[ create a new paste ] login | about

Link: http://codepad.org/JKEL5hlA    [ raw code | fork ]

C++, pasted on Nov 8:
#include "CodecUtf8.h"

// Copyright (c) 2011, Alf P. Steinbach


//--------------------------------------------------------- Dependencies:

#include <progrock/cppx/u/low_level/ConvertUTF.h>       // From Unicode consortium.
#include <algorithm>                                    // std::min


//--------------------------------------------------------- Implementation:

namespace progrock{ namespace cppx{ namespace u { namespace detail {
    using std::min;

    static std::codecvt_base::result toFacetResult( ConversionResult r )
    {
        typedef std::codecvt_base F;

        return (0?F::result()
            : r == conversionOK?        F::ok
            : r == sourceExhausted?     F::partial
            : r == targetExhausted?     F::partial
            : r == sourceIllegal?       F::error
            :   F::result(-1)
            );
    }

    CodecUtf8_< 16 >::result CodecUtf8_< 16 >::do_out(
        state_type&                 state,
        intern_type const*          from,
        intern_type const*          from_end,
        intern_type const*&         from_next,
        extern_type*                to,
        extern_type*                to_end,
        extern_type*&               to_next
        ) const
    {
        (void) state;
        from_next = from;
        to_next = to;
        ConversionResult const conversionResult = ConvertUTF16toUTF8(
		    reinterpret_cast< UTF16 const** >( &from_next ),    // sourceStart
            reinterpret_cast< UTF16 const* >( from_end ),       // sourceEnd,
		    reinterpret_cast< UTF8** >( &to_next ),             // targetStart
            reinterpret_cast< UTF8* >( to_end ),                // targetEnd
            lenientConversion                                   // flags
            );
        return toFacetResult( conversionResult );
    }

    CodecUtf8_< 16 >::result CodecUtf8_< 16 >::do_in(
        state_type&                 state,
        extern_type const*          from,
        extern_type const*          from_end,
        extern_type const*&         from_next,
        intern_type*                to,
        intern_type*                to_end,
        intern_type*&               to_next
        ) const
    {
        (void) state;
        from_next = from;
        to_next = to;
        ConversionResult const conversionResult = ConvertUTF8toUTF16(
		    reinterpret_cast< UTF8 const** >( &from_next ),     // sourceStart
            reinterpret_cast< UTF8 const* >( from_end ),        // sourceEnd,
		    reinterpret_cast< UTF16** >( &to_next ),            // targetStart
            reinterpret_cast< UTF16* >( to_end ),               // targetEnd
            lenientConversion                                   // flags
            );
        return toFacetResult( conversionResult );
    }

    CodecUtf8_< 16 >::result CodecUtf8_< 16 >::do_unshift(
        state_type&                 state,
        extern_type*                to,
        extern_type*                to_end,
        extern_type*&               to_next
        ) const
    {
        (void) state;  (void) to;  (void) to_end;  (void) to_next;
        return noconv;
    }

    int CodecUtf8_< 16 >::do_encoding() const CPPX_NOEXCEPT
    {
        // -1  <-- the encoding of the extern sequence is state-dependent
        // n   <-- the number of extern chars per internal char
        // 0   <-- variable length encoding
        return 0;
    }

    bool CodecUtf8_< 16 >::do_always_noconv() const CPPX_NOEXCEPT
    {
        return false;
    }

    int CodecUtf8_< 16 >::do_length(
        state_type&                 state,
        extern_type const*          from,
        extern_type const*          from_end,
        size_t                      max_result
        ) const
    {
        typedef unsigned char UChar;

        (void) state;
        int nChars  = 0;
        while( from < from_end )
        {
            ++nChars;
            if( UChar( *from ) < 128 )
            {
                ++from;
            }
            else
            {
                do{ ++from; } while( from < from_end && UChar( *from >= 128 ) );
            }
        }

        // NOTE: this does not include space for surrogate pairs.
        return int( min( size_t( nChars ), max_result ) );
        // See C++11 footnote
        // "242) Informally, this means that basic_filebuf assumes that the mappings from
        // internal to external characters is 1 to N: a codecvt facet that is used by
        // basic_filebuf must be able to translate characters one internal character at a
        // time."
    }

    int CodecUtf8_< 16 >::do_max_length() const CPPX_NOEXCEPT
    {
        return 1;
    }

} } } }  // namespace progrock::cppx::u::detail


Create a new paste based on this one


Comments: