エンジニアのソフトウェア的愛情

または私は如何にして心配するのを止めてプログラムを・愛する・ようになったか

続・S式パーサ

文字列(二重引用符で囲まれた文字列)と整数値を識別するようにしました。
パースに失敗した時にgotoで後始末に跳ぶようになっています。この部分、当初は例外で実装しようとしたんですがパースエラーは例外じゃないだろう、ということで書き直し。いろいろ言われるgotoですけど、便利に使えばやっぱり便利です。例外処理機構も用途を限定した一種のgotoですし。

main関数の部分は前回と同じです。

sexp_parser.h

#ifndef SEXP_PARSER_H
#define SEXP_PARSER_H

#include <iosfwd>
#include <string>

typedef std::string::const_iterator iterator;
typedef std::string::value_type     value_type;

class Sexp
{
public:
    virtual ~Sexp() {}

    virtual void write(std::ostream& out) const = 0;
};

class Atom : public Sexp
{
public:
    Atom(iterator begin, iterator end);

    void write(std::ostream& out) const;

    const std::string& asString() const;

private:
    std::string s_;
};

class Integer : public Sexp
{
public:
    explicit Integer(const std::string& s);

    void write(std::ostream& out) const;

    int getValue() const;

private:
    int n_;
};

class String : public Sexp
{
public:
    explicit String(const std::string& s);

    void write(std::ostream& out) const;

    const std::string& getValue() const;

private:
    std::string s_;
};

class Pair : public Sexp
{
public:
    Pair(Sexp* car, Sexp* cdr);
    ~Pair();

    void write(std::ostream& out) const;

    const Sexp& car() const;
    const Sexp& cdr() const;

private:
    Sexp* car_;
    Sexp* cdr_;
};

class Nil : public Sexp
{
public:
    void write(std::ostream& out) const;
};

std::ostream& operator << (std::ostream& out, const Sexp& sexp);

class Parser
{
public:
    Parser();

    Sexp* parse(iterator& begin, iterator end);

private:
    void skipWhitespace(iterator& i) const;
    bool isAtomChar(value_type c) const;

    Sexp* readAtom(iterator& i);
    Sexp* readInteger(iterator& i);
    Sexp* readString(iterator& i);
    Sexp* readPair(iterator& i);
    Sexp* readList(iterator& i);
    Sexp* readSexp(iterator& i);

    Sexp* readRawList(iterator& i);
};

#endif//SEXP_PARSER_H

sexp_parser.cpp

#include "sexp_parser.h"

#include <iostream>
#include <cstdlib>
#include <cctype>

static const char eos            = '\0';
static const char dot            = '.';
static const char leftBracket    = '(';
static const char rightBracket   = ')';
static const char doubeQuoteMark = '\"';
static const char yenMark        = '\\';

bool isDelimiter(char c)
{
    return std::isspace(c) || (c == eos) || (c == dot) || (c == leftBracket) || (c == rightBracket);
}

std::ostream& operator << (std::ostream& out, const Sexp& sexp)
{
    sexp.write(out);
    return out;
}

Atom::Atom(iterator begin, iterator end) : Sexp(), s_(begin, end)
{
}

void Atom::write(std::ostream& out) const
{
    out << "[ ATOM:" << s_ << " ]";
}

const std::string& Atom::asString() const
{
    return s_;
}

Integer::Integer(const std::string& s) : Sexp(), n_(std::atoi(s.c_str()))
{
}

void Integer::write(std::ostream& out) const
{
    out << "[ Integer: " << n_ << " ]";
}

int Integer::getValue() const
{
    return n_;
}

String::String(const std::string& s) : Sexp(), s_(s)
{
}

void String::write(std::ostream& out) const
{
    out << "[ STRING:" << s_ << " ]";
}

const std::string& String::getValue() const
{
    return s_;
}

Pair::Pair(Sexp* car, Sexp* cdr) : Sexp(), car_(car), cdr_(cdr)
{
}

Pair::~Pair()
{
    delete car_;
    delete cdr_;
}

void Pair::write(std::ostream& out) const
{
    out << "[ PAIR:" << *car_ << "." << *cdr_ << " ]";
}

const Sexp& Pair::car() const
{
    return *car_;
}

const Sexp& Pair::cdr() const
{
    return *cdr_;
}

void Nil::write(std::ostream& out) const
{
    out << "[ NIL ]";
}

Parser::Parser()
{
}

Sexp* Parser::parse(iterator& begin, iterator end)
{
    return readSexp(begin);
}

void Parser::skipWhitespace(iterator& i) const
{
    while(std::isspace(*i))
    {
        ++i;
    }
}

bool Parser::isAtomChar(value_type c) const
{
    return ! ((c == eos) || (isspace(c) || (c == dot) || (c == leftBracket) || (c == rightBracket)));
}

Sexp* Parser::readAtom(iterator& i)
{
    skipWhitespace(i);

    iterator begin(i);
    while(isAtomChar(*i))
    {
        ++i;
    }

    if(begin == i) { goto failed; }

    return new Atom(begin, i);

failed:
    return 0;
}

Sexp* Parser::readInteger(iterator& i)
{
    skipWhitespace(i);

    iterator begin(i);
    if(( ! std::isdigit(*i)) || (*i == '0'))
    {
        return 0;
    }
    ++i;

    while(std::isdigit(*i))
    {
        ++i;
    }

    return new Integer(std::string(begin, i));
}

Sexp* Parser::readString(iterator& i)
{
    skipWhitespace(i);

    if(*i != doubeQuoteMark) { return 0; }
    ++i;

    std::string result;
    for(;;)
    {
        if(*i == eos)
        {
            return 0;
        }

        if(*i == doubeQuoteMark)
        {
            ++i;
            return new String(result);
        }

        if(*i == yenMark)
        {
            ++i;
            if(*i == yenMark)
            {
                result += yenMark;
            }
            else if(*i == doubeQuoteMark)
            {
                result += doubeQuoteMark;
            }
            else
            {
                return 0;
            }
        }

        result += *i;
        ++i;
    }

    return 0;
}

Sexp* Parser::readPair(iterator& i)
{
    Sexp* car = 0;
    Sexp* cdr = 0;

    skipWhitespace(i);

    if(*i != leftBracket) { goto failed; }
    ++i;

    skipWhitespace(i);

    car = readSexp(i);
    if(car == 0) { goto failed; }

    skipWhitespace(i);

    if(*i != dot) { goto failed; }
    ++i;

    cdr = readSexp(i);
    if(cdr == 0) { goto failed; }

    skipWhitespace(i);

    if(*i != rightBracket) { goto failed; }
    ++i;

    return new Pair(car, cdr);

failed:
    delete car;
    delete cdr;
    return 0;
}

Sexp* Parser::readList(iterator& i)
{
    Sexp* sexp = 0;

    skipWhitespace(i);

    if(*i != leftBracket) { goto failed; }
    ++i;

    sexp = readRawList(i);

    skipWhitespace(i);

    if(*i != rightBracket) { goto failed; }
    ++i;

    if(sexp != 0)
    {
        return sexp;
    }
    else
    {
        return new Nil;
    }

failed:
    delete sexp;
    return 0;
}

Sexp* Parser::readRawList(iterator& i)
{
    Sexp* car = 0;
    Sexp* cdr = 0;

    car = readSexp(i);

    if((car == 0) || (! isDelimiter(*i))) { goto failed; }

    cdr = readRawList(i);

    if(cdr != 0)
    {
        return new Pair(car, cdr);
    }

    skipWhitespace(i);

    if(*i != dot)
    {
        return new Pair(car, new Nil);
    }
    ++i;

    skipWhitespace(i);
    cdr = readSexp(i);
    if(cdr == 0) { goto failed; }

    return new Pair(car, cdr);

failed:
    delete car;
    delete cdr;
    return 0;
}

Sexp* Parser::readSexp(iterator& i)
{
    Sexp* sexp;

    iterator begin(i);
    if((sexp = readString(i)) != 0) { return sexp; }

    i = begin;
    if((sexp = readInteger(i)) != 0) { return sexp; }

    i = begin;
    if((sexp = readAtom(i)) != 0) { return sexp; }

    i = begin;
    if((sexp = readPair(i)) != 0) { return sexp; }

    i = begin;
    if((sexp = readList(i)) != 0) { return sexp; }

    return 0;
}