//See comments to...
//http://programmingpraxis.com/2015/10/30/reverse-string-ignoring-special-characters/#comments
//Requires C++14.
#include <iostream>
#include <string>
#include <memory>
#include <iterator>
#include <algorithm>
template<typename InnerIter>
class Utf8_iterator:
public std::iterator<std::forward_iterator_tag, std::string>
{
private:
InnerIter inner;
InnerIter inner_end;
public:
static bool is_multibyte(typename InnerIter::value_type c)
{
return 0b1000'0000 & c;
}
static bool is_continuation_byte(typename InnerIter::value_type c)
{
return (0b1000'0000 & c) && !(0b0100'0000 & c);
}
Utf8_iterator(InnerIter i, InnerIter end): inner(i), inner_end(end) {}
std::string operator*()
{
//This is supposed to return a reference.
if (is_multibyte(*inner)) {
auto end = std::find_if_not(inner + 1, inner_end,
is_continuation_byte);
return std::string(inner, end);
} else {
return std::string(1, *inner);
}
}
Utf8_iterator<InnerIter> operator++()
{
if (is_multibyte(*inner)) {
inner = std::find_if_not(inner + 1, inner_end,
is_continuation_byte);
} else {
++inner;
}
return *this;
}
Utf8_iterator<InnerIter> operator++(int)
{
auto old = *this;
++(*this);
return old;
}
bool operator==(Utf8_iterator<InnerIter> that)
{
return inner == that.inner;
}
bool operator!=(Utf8_iterator<InnerIter> that)
{
return !(*this == that);
}
};
auto make_utf8_iter(std::string& s)
{
return Utf8_iterator<std::string::iterator>(s.begin(), s.end());
}
auto make_utf8_end(std::string& s)
{
return Utf8_iterator<std::string::iterator>(s.end(), s.end());
}
int main()
{
std::string s(u8"This is a Unicode Character: \u2018.");
std::cout << s << '\n';
auto first = make_utf8_iter(s);
auto last = make_utf8_end(s);
std::copy(first, last,
std::ostream_iterator<std::string>(std::cout, "\n"));
}