{-# LANGUAGE MagicHash #-}
module Basement.String.Encoding.UTF16
( UTF16(..)
, UTF16_Invalid(..)
) where
import GHC.Prim
import GHC.Word
import GHC.Types
import qualified Prelude
import Basement.Compat.Base
import Basement.Compat.Primitive
import Basement.IntegralConv
import Basement.Bits
import Basement.Types.OffsetSize
import Basement.Monad
import Basement.Numerical.Additive
import Basement.Numerical.Subtractive
import Basement.UArray
import Basement.UArray.Mutable (MUArray)
import Basement.MutableBuilder
import Basement.String.Encoding.Encoding
data UTF16_Invalid
= InvalidContinuation
| InvalidUnicode Char
deriving (Int -> UTF16_Invalid -> ShowS
[UTF16_Invalid] -> ShowS
UTF16_Invalid -> String
(Int -> UTF16_Invalid -> ShowS)
-> (UTF16_Invalid -> String)
-> ([UTF16_Invalid] -> ShowS)
-> Show UTF16_Invalid
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> UTF16_Invalid -> ShowS
showsPrec :: Int -> UTF16_Invalid -> ShowS
$cshow :: UTF16_Invalid -> String
show :: UTF16_Invalid -> String
$cshowList :: [UTF16_Invalid] -> ShowS
showList :: [UTF16_Invalid] -> ShowS
Show, UTF16_Invalid -> UTF16_Invalid -> Bool
(UTF16_Invalid -> UTF16_Invalid -> Bool)
-> (UTF16_Invalid -> UTF16_Invalid -> Bool) -> Eq UTF16_Invalid
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: UTF16_Invalid -> UTF16_Invalid -> Bool
== :: UTF16_Invalid -> UTF16_Invalid -> Bool
$c/= :: UTF16_Invalid -> UTF16_Invalid -> Bool
/= :: UTF16_Invalid -> UTF16_Invalid -> Bool
Eq, Typeable)
instance Exception UTF16_Invalid
data UTF16 = UTF16
instance Encoding UTF16 where
type Unit UTF16 = Word16
type Error UTF16 = UTF16_Invalid
encodingNext :: UTF16
-> (Offset (Unit UTF16) -> Unit UTF16)
-> Offset (Unit UTF16)
-> Either (Error UTF16) (Char, Offset (Unit UTF16))
encodingNext UTF16
_ = (Offset Word16 -> Word16)
-> Offset Word16 -> Either UTF16_Invalid (Char, Offset Word16)
(Offset (Unit UTF16) -> Unit UTF16)
-> Offset (Unit UTF16)
-> Either (Error UTF16) (Char, Offset (Unit UTF16))
next
encodingWrite :: forall (st :: * -> *) err.
(PrimMonad st, Monad st) =>
UTF16
-> Char
-> Builder
(UArray (Unit UTF16)) (MUArray (Unit UTF16)) (Unit UTF16) st err ()
encodingWrite UTF16
_ = Char -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
Char
-> Builder
(UArray (Unit UTF16)) (MUArray (Unit UTF16)) (Unit UTF16) st err ()
forall (st :: * -> *) err.
(PrimMonad st, Monad st) =>
Char -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write
next :: (Offset Word16 -> Word16)
-> Offset Word16
-> Either UTF16_Invalid (Char, Offset Word16)
next :: (Offset Word16 -> Word16)
-> Offset Word16 -> Either UTF16_Invalid (Char, Offset Word16)
next Offset Word16 -> Word16
getter Offset Word16
off
| Word16
h Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< Word16
0xd800 = (Char, Offset Word16) -> Either UTF16_Invalid (Char, Offset Word16)
forall a b. b -> Either a b
Right (Word16 -> Char
toChar16 Word16
h, Offset Word16
off Offset Word16 -> Offset Word16 -> Offset Word16
forall a. Additive a => a -> a -> a
+ Int -> Offset Word16
forall ty. Int -> Offset ty
Offset Int
1)
| Word16
h Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word16
0xe000 = (Char, Offset Word16) -> Either UTF16_Invalid (Char, Offset Word16)
forall a b. b -> Either a b
Right (Word16 -> Char
toChar16 Word16
h, Offset Word16
off Offset Word16 -> Offset Word16 -> Offset Word16
forall a. Additive a => a -> a -> a
+ Int -> Offset Word16
forall ty. Int -> Offset ty
Offset Int
1)
| Bool
otherwise = Either UTF16_Invalid (Char, Offset Word16)
nextContinuation
where
h :: Word16
!h :: Word16
h = Offset Word16 -> Word16
getter Offset Word16
off
to32 :: Word16 -> Word32
to32 :: Word16 -> Word32
to32 (W16# Word16#
w) = Word32# -> Word32
W32# (Word16# -> Word32#
word16ToWord32# Word16#
w)
toChar16 :: Word16 -> Char
toChar16 :: Word16 -> Char
toChar16 (W16# Word16#
w) = Char# -> Char
C# (Word32# -> Char#
word32ToChar# (Word16# -> Word32#
word16ToWord32# Word16#
w))
nextContinuation :: Either UTF16_Invalid (Char, Offset Word16)
nextContinuation
| Word16
cont Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word16
0xdc00 Bool -> Bool -> Bool
&& Word16
cont Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< Word16
0xe00 =
let !(W32# Word32#
w) = ((Word16 -> Word32
to32 Word16
h Word32 -> Word32 -> Word32
forall bits. BitOps bits => bits -> bits -> bits
.&. Word32
0x3ff) Word32 -> CountOf Bool -> Word32
forall bits. BitOps bits => bits -> CountOf Bool -> bits
.<<. CountOf Bool
10) Word32 -> Word32 -> Word32
forall bits. BitOps bits => bits -> bits -> bits
.|. (Word16 -> Word32
to32 Word16
cont Word32 -> Word32 -> Word32
forall bits. BitOps bits => bits -> bits -> bits
.&. Word32
0x3ff)
in (Char, Offset Word16) -> Either UTF16_Invalid (Char, Offset Word16)
forall a b. b -> Either a b
Right (Char# -> Char
C# (Word32# -> Char#
word32ToChar# Word32#
w), Offset Word16
off Offset Word16 -> Offset Word16 -> Offset Word16
forall a. Additive a => a -> a -> a
+ Int -> Offset Word16
forall ty. Int -> Offset ty
Offset Int
2)
| Bool
otherwise = UTF16_Invalid -> Either UTF16_Invalid (Char, Offset Word16)
forall a b. a -> Either a b
Left UTF16_Invalid
InvalidContinuation
where
cont :: Word16
!cont :: Word16
cont = Offset Word16 -> Word16
getter (Offset Word16 -> Word16) -> Offset Word16 -> Word16
forall a b. (a -> b) -> a -> b
$ Offset Word16
off Offset Word16 -> Offset Word16 -> Offset Word16
forall a. Additive a => a -> a -> a
+ Int -> Offset Word16
forall ty. Int -> Offset ty
Offset Int
1
write :: (PrimMonad st, Monad st)
=> Char
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write :: forall (st :: * -> *) err.
(PrimMonad st, Monad st) =>
Char -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
write Char
c
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Int -> Char
forall a. Enum a => Int -> a
toEnum Int
0xd800 = Word16 -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend (Word16
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ())
-> Word16
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a b. (a -> b) -> a -> b
$ Char -> Word16
w16 Char
c
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Int -> Char
forall a. Enum a => Int -> a
toEnum Int
0x10000 = let (Word16
w1, Word16
w2) = Char -> (Word16, Word16)
wHigh Char
c in Word16 -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend Word16
w1 Builder (UArray Word16) (MUArray Word16) Word16 st err ()
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a b.
Builder (UArray Word16) (MUArray Word16) Word16 st err a
-> Builder (UArray Word16) (MUArray Word16) Word16 st err b
-> Builder (UArray Word16) (MUArray Word16) Word16 st err b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Word16 -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend Word16
w2
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Int -> Char
forall a. Enum a => Int -> a
toEnum Int
0x10ffff = UTF16_Invalid
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a e. Exception e => e -> a
throw (UTF16_Invalid
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ())
-> UTF16_Invalid
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a b. (a -> b) -> a -> b
$ Char -> UTF16_Invalid
InvalidUnicode Char
c
| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Int -> Char
forall a. Enum a => Int -> a
toEnum Int
0xe000 = Word16 -> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall ty (state :: * -> *) err.
(PrimType ty, PrimMonad state) =>
ty -> Builder (UArray ty) (MUArray ty) ty state err ()
builderAppend (Word16
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ())
-> Word16
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a b. (a -> b) -> a -> b
$ Char -> Word16
w16 Char
c
| Bool
otherwise = UTF16_Invalid
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a e. Exception e => e -> a
throw (UTF16_Invalid
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ())
-> UTF16_Invalid
-> Builder (UArray Word16) (MUArray Word16) Word16 st err ()
forall a b. (a -> b) -> a -> b
$ Char -> UTF16_Invalid
InvalidUnicode Char
c
where
w16 :: Char -> Word16
w16 :: Char -> Word16
w16 (C# Char#
ch) = Word16# -> Word16
W16# (Word# -> Word16#
wordToWord16# (Int# -> Word#
int2Word# (Char# -> Int#
ord# Char#
ch)))
to16 :: Word32 -> Word16
to16 :: Word32 -> Word16
to16 = Word32 -> Word16
forall a b. (Integral a, Num b) => a -> b
Prelude.fromIntegral
wHigh :: Char -> (Word16, Word16)
wHigh :: Char -> (Word16, Word16)
wHigh (C# Char#
ch) =
let v :: Difference Word32
v = Word32# -> Word32
W32# (Char# -> Word32#
charToWord32# Char#
ch) Word32 -> Word32 -> Difference Word32
forall a. Subtractive a => a -> a -> Difference a
- Word32
0x10000
in (Word16
0xdc00 Word16 -> Word16 -> Word16
forall bits. BitOps bits => bits -> bits -> bits
.|. Word32 -> Word16
to16 (Word32
Difference Word32
v Word32 -> CountOf Bool -> Word32
forall bits. BitOps bits => bits -> CountOf Bool -> bits
.>>. CountOf Bool
10), Word16
0xd800 Word16 -> Word16 -> Word16
forall bits. BitOps bits => bits -> bits -> bits
.|. Word32 -> Word16
to16 (Word32
Difference Word32
v Word32 -> Word32 -> Word32
forall bits. BitOps bits => bits -> bits -> bits
.&. Word32
0x3ff))