-- ------------------------------------------------------------

{- |
   Module     : Data.String.UTF8Decoding
   Copyright  : Copyright (C) 2010- Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : stable
   Portability: portable

   Interface for Data.Char.UTF8 funtions

-}

-- ------------------------------------------------------------

module Data.String.UTF8Decoding (
   decodeUtf8,
   decodeUtf8EmbedErrors,
   decodeUtf8IgnoreErrors,
   )
where

import qualified Data.String.UTF8 as UTF8
import           Data.Word (Word8)

-- | calls 'Data.Char.UTF8.decode' for parsing and decoding UTF-8

decodeUtf8      :: String -> (String, [String])
decodeUtf8 :: String -> (String, [String])
decodeUtf8 String
str
    = (String
res, ((Error, Int) -> String) -> [(Error, Int)] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map ((Error -> Int -> String) -> (Error, Int) -> String
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Error -> Int -> String
toErrStr) [(Error, Int)]
errs)
    where
    (String
res, [(Error, Int)]
errs) = [Word8] -> (String, [(Error, Int)])
UTF8.decode ([Word8] -> (String, [(Error, Int)]))
-> (String -> [Word8]) -> String -> (String, [(Error, Int)])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [Word8]
stringToByteString (String -> (String, [(Error, Int)]))
-> String -> (String, [(Error, Int)])
forall a b. (a -> b) -> a -> b
$ String
str

decodeUtf8IgnoreErrors  :: String -> String
decodeUtf8IgnoreErrors :: String -> String
decodeUtf8IgnoreErrors
    = (String, [String]) -> String
forall a b. (a, b) -> a
fst ((String, [String]) -> String)
-> (String -> (String, [String])) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> (String, [String])
decodeUtf8

decodeUtf8EmbedErrors   :: String -> [Either String Char]
decodeUtf8EmbedErrors :: String -> [Either String Char]
decodeUtf8EmbedErrors String
str
    = (Either (Error, Int) Char -> Either String Char)
-> [Either (Error, Int) Char] -> [Either String Char]
forall a b. (a -> b) -> [a] -> [b]
map (((Error, Int) -> Either String Char)
-> (Char -> Either String Char)
-> Either (Error, Int) Char
-> Either String Char
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (String -> Either String Char
forall a b. a -> Either a b
Left (String -> Either String Char)
-> ((Error, Int) -> String) -> (Error, Int) -> Either String Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Error -> Int -> String) -> (Error, Int) -> String
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Error -> Int -> String
toErrStr) Char -> Either String Char
forall a b. b -> Either a b
Right) ([Either (Error, Int) Char] -> [Either String Char])
-> [Either (Error, Int) Char] -> [Either String Char]
forall a b. (a -> b) -> a -> b
$
      [Word8] -> [Either (Error, Int) Char]
UTF8.decodeEmbedErrors ([Word8] -> [Either (Error, Int) Char])
-> [Word8] -> [Either (Error, Int) Char]
forall a b. (a -> b) -> a -> b
$ String -> [Word8]
stringToByteString (String -> [Word8]) -> String -> [Word8]
forall a b. (a -> b) -> a -> b
$ String
str

stringToByteString :: String -> [Word8]
stringToByteString :: String -> [Word8]
stringToByteString = (Char -> Word8) -> String -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Word8
forall a. Enum a => Int -> a
toEnum (Int -> Word8) -> (Char -> Int) -> Char -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
forall a. Enum a => a -> Int
fromEnum)

toErrStr :: UTF8.Error -> Int -> String
toErrStr :: Error -> Int -> String
toErrStr Error
err Int
pos
        = String
" at input position " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
pos String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
": " String -> String -> String
forall a. [a] -> [a] -> [a]
++ Error -> String
forall a. Show a => a -> String
show Error
err

-- ------------------------------------------------------------