#include "CodecUtf8.h"
// Copyright (c) 2011, Alf P. Steinbach
//--------------------------------------------------------- Dependencies:
#include <progrock/cppx/u/low_level/ConvertUTF.h> // From Unicode consortium.
#include <algorithm> // std::min
//--------------------------------------------------------- Implementation:
namespace progrock{ namespace cppx{ namespace u { namespace detail {
using std::min;
static std::codecvt_base::result toFacetResult( ConversionResult r )
{
typedef std::codecvt_base F;
return (0?F::result()
: r == conversionOK? F::ok
: r == sourceExhausted? F::partial
: r == targetExhausted? F::partial
: r == sourceIllegal? F::error
: F::result(-1)
);
}
CodecUtf8_< 16 >::result CodecUtf8_< 16 >::do_out(
state_type& state,
intern_type const* from,
intern_type const* from_end,
intern_type const*& from_next,
extern_type* to,
extern_type* to_end,
extern_type*& to_next
) const
{
(void) state;
from_next = from;
to_next = to;
ConversionResult const conversionResult = ConvertUTF16toUTF8(
reinterpret_cast< UTF16 const** >( &from_next ), // sourceStart
reinterpret_cast< UTF16 const* >( from_end ), // sourceEnd,
reinterpret_cast< UTF8** >( &to_next ), // targetStart
reinterpret_cast< UTF8* >( to_end ), // targetEnd
lenientConversion // flags
);
return toFacetResult( conversionResult );
}
CodecUtf8_< 16 >::result CodecUtf8_< 16 >::do_in(
state_type& state,
extern_type const* from,
extern_type const* from_end,
extern_type const*& from_next,
intern_type* to,
intern_type* to_end,
intern_type*& to_next
) const
{
(void) state;
from_next = from;
to_next = to;
ConversionResult const conversionResult = ConvertUTF8toUTF16(
reinterpret_cast< UTF8 const** >( &from_next ), // sourceStart
reinterpret_cast< UTF8 const* >( from_end ), // sourceEnd,
reinterpret_cast< UTF16** >( &to_next ), // targetStart
reinterpret_cast< UTF16* >( to_end ), // targetEnd
lenientConversion // flags
);
return toFacetResult( conversionResult );
}
CodecUtf8_< 16 >::result CodecUtf8_< 16 >::do_unshift(
state_type& state,
extern_type* to,
extern_type* to_end,
extern_type*& to_next
) const
{
(void) state; (void) to; (void) to_end; (void) to_next;
return noconv;
}
int CodecUtf8_< 16 >::do_encoding() const CPPX_NOEXCEPT
{
// -1 <-- the encoding of the extern sequence is state-dependent
// n <-- the number of extern chars per internal char
// 0 <-- variable length encoding
return 0;
}
bool CodecUtf8_< 16 >::do_always_noconv() const CPPX_NOEXCEPT
{
return false;
}
int CodecUtf8_< 16 >::do_length(
state_type& state,
extern_type const* from,
extern_type const* from_end,
size_t max_result
) const
{
typedef unsigned char UChar;
(void) state;
int nChars = 0;
while( from < from_end )
{
++nChars;
if( UChar( *from ) < 128 )
{
++from;
}
else
{
do{ ++from; } while( from < from_end && UChar( *from >= 128 ) );
}
}
// NOTE: this does not include space for surrogate pairs.
return int( min( size_t( nChars ), max_result ) );
// See C++11 footnote
// "242) Informally, this means that basic_filebuf assumes that the mappings from
// internal to external characters is 1 to N: a codecvt facet that is used by
// basic_filebuf must be able to translate characters one internal character at a
// time."
}
int CodecUtf8_< 16 >::do_max_length() const CPPX_NOEXCEPT
{
return 1;
}
} } } } // namespace progrock::cppx::u::detail