エンジニアのソフトウェア的愛情

または私は如何にして心配するのを止めてプログラムを・愛する・ようになったか

diffをつくる(3) -後編

任意のコンテナに対してdiffをとれるようにテンプレートにしました。
ポイントはテンプレートになっているのが、diffの入り口Diff::ond関数だけというところです。このためdiffを求めるプログラムの大半をバイナリとしてライブラリ化することもできます。またオブジェクトの内部に記憶しているのは操作の手順のみなので、stringでもvectorでも、おなじオブジェクトでdiffをとることができます。

実行例。

$ cat hello_c++.txt 
Hello
    C++!

$ cat hello_diff.txt 
Hello
    diff!

$ ./my_diff3_2 hello_c++.txt hello_diff.txt 
 FILE NAME: 7
   h
   e
   l
   l
   o
   _
 - c
 - +
 - +
 + d
 + i
 + f
 + f
   .
   t
   x
   t

 CONTENTS: 2
   Hello
 -     C++!
 +     diff!


以下コード。

#include <string>
#include <vector>
#include <algorithm>
#include <exception>

class Diff
{
public:
    class Exception : public std::exception
    {
    public:
        explicit Exception(const char* what) : std::exception(), what_(what) {}
        ~Exception() throw() {}
        const char* what() const throw() { return what_.c_str(); }

    private:
        std::string what_;
    };

    enum EditType
    {
        DELETE,
        COMMON,
        ADD
    };

    typedef std::vector<EditType> EditSequence;

    struct TreeNode
    {
        TreeNode(EditType editType, int prev) : editType(editType), prev(prev) {}

        EditType editType;
        int      prev;
    };

    static const int NO_LINK = -1;

    struct VItem
    {

        VItem() : y(0), tail(NO_LINK) {}
        VItem(int y, int tail) : y(y), tail(tail) {}

        int y;
        int tail;
    };

    Diff() : tree_(), tail_(NO_LINK)
    {
    }

    struct Comparer
    {
        virtual ~Comparer() {}
        virtual bool isEqual(int x, int y) const = 0;
    };

    int ondImpl(int sizeA, int sizeB, const Comparer& comparer)
    {
        const int& offset = sizeA;

        VItem* v = new VItem[sizeA + sizeB + 1];

        tree_.clear();
        tail_ = NO_LINK;

        for(int d = 0; d <= sizeA + sizeB; ++d)
        {
            for(int k = -d; k <= d; k += 2)
            {
                if((k < -sizeA) || (sizeB < k))
                {
                    continue;
                }

                VItem* v_k   = v + k + offset;
                VItem* v_kp1 = v_k + 1;
                VItem* v_km1 = v_k - 1;

                if(d != 0)
                {
                    if(((k == -d) || (k == -sizeA)) || (((k != d) && (k != sizeB)) && ((v_km1->y + 1) < v_kp1->y)))
                    {
                        v_k->y    = v_kp1->y;
                        v_k->tail = tree_.size();
                        tree_.push_back(TreeNode(DELETE, v_kp1->tail));
                    }
                    else
                    {
                        v_k->y    = v_km1->y + 1;
                        v_k->tail = tree_.size();
                        tree_.push_back(TreeNode(ADD, v_km1->tail));
                    }
                }

                while(((v_k->y - k) < sizeA) && (v_k->y < sizeB) && comparer.isEqual(v_k->y - k, v_k->y))
                {
                    TreeNode node(COMMON, v_k->tail);
                    v_k->tail = tree_.size();
                    tree_.push_back(node);
                    ++v_k->y;
                }

                if(((v_k->y - k) >= sizeA) && (v_k->y >= sizeB))
                {
                    tail_ = v_k->tail;
                    delete[] v;
                    return d;
                }
            }
        }
        delete[] v;
        throw Exception("not found");
    }

    template<typename sequence_t>
    int ond(const sequence_t& sequenceA, const sequence_t& sequenceB)
    {
        struct ComparerImpl : public Comparer
        {
            sequence_t sequenceA;
            sequence_t sequenceB;

            ComparerImpl(const sequence_t& sequenceA, const sequence_t& sequenceB) : sequenceA(sequenceA), sequenceB(sequenceB) {}

            bool isEqual(int x, int y) const
            {
                return sequenceA[x] == sequenceB[y];
            }
        };

        return ondImpl(sequenceA.size(), sequenceB.size(), ComparerImpl(sequenceA, sequenceB));
    }

    void getSES(EditSequence& ses) const
    {
        ses.clear();
        for(int i = tail_; i != NO_LINK; i = tree_[i].prev)
        {
            ses.push_back(tree_[i].editType);
        }
        std::reverse(ses.begin(), ses.end());
    }

private:
    std::vector<TreeNode> tree_;
    int                   tail_;
};

#include <iostream>
#include <fstream>

std::ostream& operator << (std::ostream& out, Diff::EditType editType)
{
    switch(editType)
    {
    case Diff::DELETE: out << '-'; break;
    case Diff::COMMON: out << ' '; break;
    case Diff::ADD:    out << '+'; break;
    default:           out << '?'; break;
    }
    return out;
}

void load(const std::string& filename, std::vector<std::string>& strings)
{
    strings.clear();

    std::ifstream src(filename.c_str());
    std::string   s;
    while(std::getline(src, s).good())
    {
        strings.push_back(s);
    }
}

template<typename sequence_t>
void showDiff(const Diff::EditSequence& ses, const sequence_t& sequenceA, const sequence_t& sequenceB)
{
    typename sequence_t::const_iterator a = sequenceA.begin();
    typename sequence_t::const_iterator b = sequenceB.begin();
    for(std::vector<Diff::EditType>::const_iterator i = ses.begin(); i != ses.end(); ++i)
    {
        std::cout << *i << " ";
        switch(*i)
        {
        case Diff::DELETE:
            std::cout << *a;
            ++a;
            break;

        case Diff::COMMON:
            std::cout << *a;
            ++a;
            ++b;
            break;

        case Diff::ADD:
            std::cout << *b;
            ++b;
            break;

        default:
            break;
        }
        std::cout << "\n";
    }
}

int main(int argc, char* argv[])
{
    if(argc != 3)
    {
        std::cout << argv[0] << " <file_a> <file_b>\n";
        return 0;
    }

    try
    {
        Diff               diff;
        Diff::EditSequence ses;

        std::string filenameA(argv[1]);
        std::string filenameB(argv[2]);

        std::cout << "FILE NAME: " << diff.ond(filenameA, filenameB) << "\n";
        diff.getSES(ses);
        showDiff(ses, filenameA, filenameB);

        std::cout << "\n";

        std::vector<std::string> stringsA;
        std::vector<std::string> stringsB;

        load(filenameA, stringsA);
        load(filenameB, stringsB);

        std::cout << "CONTENTS: " << diff.ond(stringsA, stringsB) << "\n";
        diff.getSES(ses);
        showDiff(ses, stringsA, stringsB);
    }
    catch(const std::exception& e)
    {
        std::cerr << e.what() << std::endl;
    }

    return 0;
}