codepad
[
create a new paste
]
login
|
about
Language:
C
C++
D
Haskell
Lua
OCaml
PHP
Perl
Plain Text
Python
Ruby
Scheme
Tcl
//See comments to... //http://programmingpraxis.com/2015/10/30/reverse-string-ignoring-special-characters/#comments //Requires C++14. #include <iostream> #include <string> #include <memory> #include <iterator> #include <algorithm> template<typename InnerIter> class Utf8_iterator: public std::iterator<std::forward_iterator_tag, std::string> { private: InnerIter inner; InnerIter inner_end; public: static bool is_multibyte(typename InnerIter::value_type c) { return 0b1000'0000 & c; } static bool is_continuation_byte(typename InnerIter::value_type c) { return (0b1000'0000 & c) && !(0b0100'0000 & c); } Utf8_iterator(InnerIter i, InnerIter end): inner(i), inner_end(end) {} std::string operator*() { //This is supposed to return a reference. if (is_multibyte(*inner)) { auto end = std::find_if_not(inner + 1, inner_end, is_continuation_byte); return std::string(inner, end); } else { return std::string(1, *inner); } } Utf8_iterator<InnerIter> operator++() { if (is_multibyte(*inner)) { inner = std::find_if_not(inner + 1, inner_end, is_continuation_byte); } else { ++inner; } return *this; } Utf8_iterator<InnerIter> operator++(int) { auto old = *this; ++(*this); return old; } bool operator==(Utf8_iterator<InnerIter> that) { return inner == that.inner; } bool operator!=(Utf8_iterator<InnerIter> that) { return !(*this == that); } }; auto make_utf8_iter(std::string& s) { return Utf8_iterator<std::string::iterator>(s.begin(), s.end()); } auto make_utf8_end(std::string& s) { return Utf8_iterator<std::string::iterator>(s.end(), s.end()); } int main() { std::string s(u8"This is a Unicode Character: \u2018."); std::cout << s << '\n'; auto first = make_utf8_iter(s); auto last = make_utf8_end(s); std::copy(first, last, std::ostream_iterator<std::string>(std::cout, "\n")); }
Private
[
?
]
Run code
Submit