[ create a new paste ] login | about

Link: http://codepad.org/qNmEkqgy    [ raw code | fork ]

fisherro - C++, pasted on Nov 5:
//See comments to...
//http://programmingpraxis.com/2015/10/30/reverse-string-ignoring-special-characters/#comments
//Requires C++14.
#include <iostream>
#include <string>
#include <memory>
#include <iterator>
#include <algorithm>

template<typename InnerIter>
class Utf8_iterator:
    public std::iterator<std::forward_iterator_tag, std::string>
{
private:
    InnerIter inner;
    InnerIter inner_end;
public:
    static bool is_multibyte(typename InnerIter::value_type c)
    {
        return 0b1000'0000 & c;
    }
    static bool is_continuation_byte(typename InnerIter::value_type c)
    {
        return (0b1000'0000 & c) && !(0b0100'0000 & c);
    }
    Utf8_iterator(InnerIter i, InnerIter end): inner(i), inner_end(end) {}
    std::string operator*()
    {
        //This is supposed to return a reference.
        if (is_multibyte(*inner)) {
            auto end = std::find_if_not(inner + 1, inner_end,
                    is_continuation_byte);
            return std::string(inner, end);
        } else {
            return std::string(1, *inner);
        }
    }
    Utf8_iterator<InnerIter> operator++()
    {
        if (is_multibyte(*inner)) {
            inner = std::find_if_not(inner + 1, inner_end,
                    is_continuation_byte);
        } else {
            ++inner;
        }
        return *this;
    }
    Utf8_iterator<InnerIter> operator++(int)
    {
        auto old = *this;
        ++(*this);
        return old;
    }
    bool operator==(Utf8_iterator<InnerIter> that)
    {
        return inner == that.inner;
    }
    bool operator!=(Utf8_iterator<InnerIter> that)
    {
        return !(*this == that);
    }
};

auto make_utf8_iter(std::string& s)
{
    return Utf8_iterator<std::string::iterator>(s.begin(), s.end());
}

auto make_utf8_end(std::string& s)
{
    return Utf8_iterator<std::string::iterator>(s.end(), s.end());
}

int main()
{
    std::string s(u8"This is a Unicode Character: \u2018.");
    std::cout << s << '\n';
    auto first = make_utf8_iter(s);
    auto last = make_utf8_end(s);
    std::copy(first, last,
            std::ostream_iterator<std::string>(std::cout, "\n"));
}


Create a new paste based on this one


Comments: