{-# LANGUAGE CPP, MultiParamTypeClasses, FunctionalDependencies, FlexibleInstances #-}
#if __GLASGOW_HASKELL__ >= 701
{-# LANGUAGE Trustworthy #-}
#endif
--
-- |
-- Module      :  Codec.Binary.UTF8.Generic
-- Copyright   :  (c) Iavor S. Diatchki 2009
-- License     :  BSD3-style (see LICENSE)
--
-- Maintainer  :  emertens@galois.com
-- Stability   :  experimental
-- Portability :  portable
--
module Codec.Binary.UTF8.Generic
  ( UTF8Bytes(..)
  , decode
  , replacement_char
  , uncons
  , splitAt
  , take
  , drop
  , span
  , break
  , fromString
  , toString
  , foldl
  , foldr
  , length
  , lines
  , lines'
  ) where

import Data.Bits
import Data.Int
import Data.Word
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.List as List
import Prelude hiding (take,drop,splitAt,span,break,foldr,foldl,length,lines,null,tail)

import Codec.Binary.UTF8.String(encode)

#ifdef BYTESTRING_IN_BASE
import Data.ByteString.Base (unsafeHead, unsafeTail)
#endif

class (Num s, Ord s) => UTF8Bytes b s | b -> s where
  bsplit        :: s -> b -> (b,b)
  bdrop         :: s -> b -> b
  buncons       :: b -> Maybe (Word8,b)
  elemIndex     :: Word8 -> b -> Maybe s
  empty         :: b
  null          :: b -> Bool
  pack          :: [Word8] -> b
  tail          :: b -> b

instance UTF8Bytes B.ByteString Int where
  bsplit :: Int -> ByteString -> (ByteString, ByteString)
bsplit        = Int -> ByteString -> (ByteString, ByteString)
B.splitAt
  bdrop :: Int -> ByteString -> ByteString
bdrop         = Int -> ByteString -> ByteString
B.drop
  buncons :: ByteString -> Maybe (Word8, ByteString)
buncons       = ByteString -> Maybe (Word8, ByteString)
unconsB
  elemIndex :: Word8 -> ByteString -> Maybe Int
elemIndex     = Word8 -> ByteString -> Maybe Int
B.elemIndex
  empty :: ByteString
empty         = ByteString
B.empty
  null :: ByteString -> Bool
null          = ByteString -> Bool
B.null
  pack :: [Word8] -> ByteString
pack          = [Word8] -> ByteString
B.pack
  tail :: ByteString -> ByteString
tail          = HasCallStack => ByteString -> ByteString
ByteString -> ByteString
B.tail

instance UTF8Bytes L.ByteString Int64 where
  bsplit :: Int64 -> ByteString -> (ByteString, ByteString)
bsplit        = Int64 -> ByteString -> (ByteString, ByteString)
L.splitAt
  bdrop :: Int64 -> ByteString -> ByteString
bdrop         = Int64 -> ByteString -> ByteString
L.drop
  buncons :: ByteString -> Maybe (Word8, ByteString)
buncons       = ByteString -> Maybe (Word8, ByteString)
unconsL
  elemIndex :: Word8 -> ByteString -> Maybe Int64
elemIndex     = Word8 -> ByteString -> Maybe Int64
L.elemIndex
  empty :: ByteString
empty         = ByteString
L.empty
  null :: ByteString -> Bool
null          = ByteString -> Bool
L.null
  pack :: [Word8] -> ByteString
pack          = [Word8] -> ByteString
L.pack
  tail :: ByteString -> ByteString
tail          = HasCallStack => ByteString -> ByteString
ByteString -> ByteString
L.tail

instance UTF8Bytes [Word8] Int where
  bsplit :: Int -> [Word8] -> ([Word8], [Word8])
bsplit          = Int -> [Word8] -> ([Word8], [Word8])
forall a. Int -> [a] -> ([a], [a])
List.splitAt
  bdrop :: Int -> [Word8] -> [Word8]
bdrop           = Int -> [Word8] -> [Word8]
forall a. Int -> [a] -> [a]
List.drop
  buncons :: [Word8] -> Maybe (Word8, [Word8])
buncons (Word8
x:[Word8]
xs)  = (Word8, [Word8]) -> Maybe (Word8, [Word8])
forall a. a -> Maybe a
Just (Word8
x,[Word8]
xs)
  buncons []      = Maybe (Word8, [Word8])
forall a. Maybe a
Nothing
  elemIndex :: Word8 -> [Word8] -> Maybe Int
elemIndex Word8
x [Word8]
xs  = Word8 -> [Word8] -> Maybe Int
forall a. Eq a => a -> [a] -> Maybe Int
List.elemIndex (Int -> Word8
forall a. Enum a => Int -> a
toEnum (Word8 -> Int
forall a. Enum a => a -> Int
fromEnum Word8
x)) [Word8]
xs
  empty :: [Word8]
empty           = []
  null :: [Word8] -> Bool
null            = [Word8] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
List.null
  pack :: [Word8] -> [Word8]
pack            = [Word8] -> [Word8]
forall a. a -> a
id
  tail :: [Word8] -> [Word8]
tail            = [Word8] -> [Word8]
forall a. HasCallStack => [a] -> [a]
List.tail

-- | Converts a Haskell string into a UTF8 encoded bytestring.
{-# SPECIALIZE fromString :: String -> B.ByteString  #-}
{-# SPECIALIZE fromString :: String -> L.ByteString  #-}
{-# SPECIALIZE fromString :: String -> [Word8] #-}
fromString :: UTF8Bytes b s => String -> b
fromString :: forall b s. UTF8Bytes b s => String -> b
fromString String
xs = [Word8] -> b
forall b s. UTF8Bytes b s => [Word8] -> b
pack (String -> [Word8]
encode String
xs)

-- | Convert a UTF8 encoded bytestring into a Haskell string.
-- Invalid characters are replaced with @\'\\0xFFFD\'@.
{-# SPECIALIZE toString :: B.ByteString -> String #-}
{-# SPECIALIZE toString :: L.ByteString -> String #-}
{-# SPECIALIZE toString :: [Word8] -> String #-}
toString :: UTF8Bytes b s => b -> String
toString :: forall b s. UTF8Bytes b s => b -> String
toString b
bs = (Char -> String -> String) -> String -> b -> String
forall b s a. UTF8Bytes b s => (Char -> a -> a) -> a -> b -> a
foldr (:) [] b
bs

-- | This character is used to mark errors in a UTF8 encoded string.
replacement_char :: Char
replacement_char :: Char
replacement_char = Char
'\xfffd'

-- | Try to extract a character from a byte string.
-- Returns 'Nothing' if there are no more bytes in the byte string.
-- Otherwise, it returns a decoded character and the number of
-- bytes used in its representation.
-- Errors are replaced by character @\'\\0xFFFD\'@.

-- XXX: Should we combine sequences of errors into a single replacement
-- character?
{-# SPECIALIZE decode :: B.ByteString -> Maybe (Char,Int) #-}
{-# SPECIALIZE decode :: L.ByteString -> Maybe (Char,Int64) #-}
{-# SPECIALIZE decode :: [Word8]      -> Maybe (Char,Int) #-}
decode :: UTF8Bytes b s => b -> Maybe (Char,s)
decode :: forall b s. UTF8Bytes b s => b -> Maybe (Char, s)
decode b
bs = do (Word8
c,b
cs) <- b -> Maybe (Word8, b)
forall b s. UTF8Bytes b s => b -> Maybe (Word8, b)
buncons b
bs
               (Char, s) -> Maybe (Char, s)
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> b -> (Char, s)
forall {a} {b} {s} {b}.
(Enum a, Bits a, UTF8Bytes b s, Num a, Num b, Ord a) =>
a -> b -> (Char, b)
choose (Word8 -> Int
forall a. Enum a => a -> Int
fromEnum Word8
c) b
cs)
  where
  choose :: a -> b -> (Char, b)
choose a
c b
cs
    | a
c a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
0x80  = (Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ a -> Int
forall a. Enum a => a -> Int
fromEnum a
c, b
1)
    | a
c a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
0xc0  = (Char
replacement_char, b
1)
    | a
c a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
0xe0  = Int -> b -> (Char, b)
forall {b} {s} {b}. (UTF8Bytes b s, Num b) => Int -> b -> (Char, b)
bytes2 (a -> a -> Int
forall {a}. (Enum a, Bits a) => a -> a -> Int
mask a
c a
0x1f) b
cs
    | a
c a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
0xf0  = Int -> b -> (Char, b)
forall {b} {s} {b}. (UTF8Bytes b s, Num b) => Int -> b -> (Char, b)
bytes3 (a -> a -> Int
forall {a}. (Enum a, Bits a) => a -> a -> Int
mask a
c a
0x0f) b
cs
    | a
c a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
0xf8  = Int -> b -> (Char, b)
forall {b} {s} {b}. (UTF8Bytes b s, Num b) => Int -> b -> (Char, b)
bytes4 (a -> a -> Int
forall {a}. (Enum a, Bits a) => a -> a -> Int
mask a
c a
0x07) b
cs
    | Bool
otherwise = (Char
replacement_char, b
1)

  mask :: a -> a -> Int
mask a
c a
m = a -> Int
forall a. Enum a => a -> Int
fromEnum (a
c a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
m)

  combine :: Int -> a -> Int
combine Int
acc a
r = Int -> Int -> Int
forall a. Bits a => a -> Int -> a
shiftL Int
acc Int
6 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. a -> Int
forall a. Enum a => a -> Int
fromEnum (a
r a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0x3f)

  follower :: Int -> a -> Maybe Int
follower Int
acc a
r | a
r a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0xc0 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0x80 = Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> a -> Int
forall {a}. (Enum a, Bits a, Num a) => Int -> a -> Int
combine Int
acc a
r)
  follower Int
_ a
_                        = Maybe Int
forall a. Maybe a
Nothing

  {-# INLINE get_follower #-}
  get_follower :: Int -> b -> Maybe (Int, b)
get_follower Int
acc b
cs = do (Word8
x,b
xs) <- b -> Maybe (Word8, b)
forall b s. UTF8Bytes b s => b -> Maybe (Word8, b)
buncons b
cs
                           Int
acc1 <- Int -> Word8 -> Maybe Int
forall {a}. (Bits a, Num a, Enum a) => Int -> a -> Maybe Int
follower Int
acc Word8
x
                           (Int, b) -> Maybe (Int, b)
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
acc1,b
xs)

  bytes2 :: Int -> b -> (Char, b)
bytes2 Int
c b
cs = case Int -> b -> Maybe (Int, b)
forall {b} {s}. UTF8Bytes b s => Int -> b -> Maybe (Int, b)
get_follower Int
c b
cs of
                  Just (Int
d, b
_) | Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x80  -> (Int -> Char
forall a. Enum a => Int -> a
toEnum Int
d, b
2)
                              | Bool
otherwise  -> (Char
replacement_char, b
1)
                  Maybe (Int, b)
_ -> (Char
replacement_char, b
1)

  bytes3 :: Int -> b -> (Char, b)
bytes3 Int
c b
cs =
    case Int -> b -> Maybe (Int, b)
forall {b} {s}. UTF8Bytes b s => Int -> b -> Maybe (Int, b)
get_follower Int
c b
cs of
      Just (Int
d1, b
cs1) ->
        case Int -> b -> Maybe (Int, b)
forall {b} {s}. UTF8Bytes b s => Int -> b -> Maybe (Int, b)
get_follower Int
d1 b
cs1 of
          Just (Int
d, b
_) | (Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x800 Bool -> Bool -> Bool
&& Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xd800) Bool -> Bool -> Bool
||
                        (Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0xdfff Bool -> Bool -> Bool
&& Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xfffe) -> (Int -> Char
forall a. Enum a => Int -> a
toEnum Int
d, b
3)
                      | Bool
otherwise -> (Char
replacement_char, b
3)
          Maybe (Int, b)
_ -> (Char
replacement_char, b
2)
      Maybe (Int, b)
_ -> (Char
replacement_char, b
1)

  bytes4 :: Int -> b -> (Char, b)
bytes4 Int
c b
cs =
    case Int -> b -> Maybe (Int, b)
forall {b} {s}. UTF8Bytes b s => Int -> b -> Maybe (Int, b)
get_follower Int
c b
cs of
      Just (Int
d1, b
cs1) ->
        case Int -> b -> Maybe (Int, b)
forall {b} {s}. UTF8Bytes b s => Int -> b -> Maybe (Int, b)
get_follower Int
d1 b
cs1 of
          Just (Int
d2, b
cs2) ->
            case Int -> b -> Maybe (Int, b)
forall {b} {s}. UTF8Bytes b s => Int -> b -> Maybe (Int, b)
get_follower Int
d2 b
cs2 of
              Just (Int
d,b
_) | Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x10000 Bool -> Bool -> Bool
&& Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0x110000 -> (Int -> Char
forall a. Enum a => Int -> a
toEnum Int
d, b
4)
                         | Bool
otherwise                    -> (Char
replacement_char, b
4)
              Maybe (Int, b)
_ -> (Char
replacement_char, b
3)
          Maybe (Int, b)
_ -> (Char
replacement_char, b
2)
      Maybe (Int, b)
_ -> (Char
replacement_char, b
1)


-- | Split after a given number of characters.
-- Negative values are treated as if they are 0.
{-# SPECIALIZE splitAt :: Int   -> B.ByteString -> (B.ByteString,B.ByteString) #-}
{-# SPECIALIZE splitAt :: Int64 -> L.ByteString -> (L.ByteString,L.ByteString) #-}
{-# SPECIALIZE splitAt :: Int   -> [Word8] -> ([Word8],[Word8])    #-}
splitAt :: UTF8Bytes b s => s -> b -> (b,b)
splitAt :: forall b s. UTF8Bytes b s => s -> b -> (b, b)
splitAt s
x b
bs = s -> s -> b -> (b, b)
forall {t} {t}.
(Num t, UTF8Bytes t s, Ord t) =>
s -> t -> t -> (b, b)
loop s
0 s
x b
bs
  where loop :: s -> t -> t -> (b, b)
loop s
a t
n t
_ | t
n t -> t -> Bool
forall a. Ord a => a -> a -> Bool
<= t
0 = s -> b -> (b, b)
forall b s. UTF8Bytes b s => s -> b -> (b, b)
bsplit s
a b
bs
        loop s
a t
n t
bs1 = case t -> Maybe (Char, s)
forall b s. UTF8Bytes b s => b -> Maybe (Char, s)
decode t
bs1 of
                         Just (Char
_,s
y) -> s -> t -> t -> (b, b)
loop (s
as -> s -> s
forall a. Num a => a -> a -> a
+s
y) (t
nt -> t -> t
forall a. Num a => a -> a -> a
-t
1) (s -> t -> t
forall b s. UTF8Bytes b s => s -> b -> b
bdrop s
y t
bs1)
                         Maybe (Char, s)
Nothing    -> (b
bs, b
forall b s. UTF8Bytes b s => b
empty)

-- | @take n s@ returns the first @n@ characters of @s@.
-- If @s@ has less than @n@ characters, then we return the whole of @s@.
{-# INLINE take #-}
take :: UTF8Bytes b s => s -> b -> b
take :: forall b s. UTF8Bytes b s => s -> b -> b
take s
n b
bs = (b, b) -> b
forall a b. (a, b) -> a
fst (s -> b -> (b, b)
forall b s. UTF8Bytes b s => s -> b -> (b, b)
splitAt s
n b
bs)

-- | @drop n s@ returns the @s@ without its first @n@ characters.
-- If @s@ has less than @n@ characters, then we return an empty string.
{-# INLINE drop #-}
drop :: UTF8Bytes b s => s -> b -> b
drop :: forall b s. UTF8Bytes b s => s -> b -> b
drop s
n b
bs = (b, b) -> b
forall a b. (a, b) -> b
snd (s -> b -> (b, b)
forall b s. UTF8Bytes b s => s -> b -> (b, b)
splitAt s
n b
bs)

-- | Split a string into two parts:  the first is the longest prefix
-- that contains only characters that satisfy the predicate; the second
-- part is the rest of the string.
-- Invalid characters are passed as @\'\\0xFFFD\'@ to the predicate.
{-# SPECIALIZE span :: (Char -> Bool) -> B.ByteString -> (B.ByteString,B.ByteString) #-}
{-# SPECIALIZE span :: (Char -> Bool) -> L.ByteString -> (L.ByteString,L.ByteString) #-}
{-# SPECIALIZE span :: (Char -> Bool) -> [Word8] -> ([Word8],[Word8])    #-}
span :: UTF8Bytes b s => (Char -> Bool) -> b -> (b,b)
span :: forall b s. UTF8Bytes b s => (Char -> Bool) -> b -> (b, b)
span Char -> Bool
p b
bs = s -> b -> (b, b)
forall {t}. UTF8Bytes t s => s -> t -> (b, b)
loop s
0 b
bs
  where loop :: s -> t -> (b, b)
loop s
a t
cs = case t -> Maybe (Char, s)
forall b s. UTF8Bytes b s => b -> Maybe (Char, s)
decode t
cs of
                      Just (Char
c,s
n) | Char -> Bool
p Char
c -> s -> t -> (b, b)
loop (s
as -> s -> s
forall a. Num a => a -> a -> a
+s
n) (s -> t -> t
forall b s. UTF8Bytes b s => s -> b -> b
bdrop s
n t
cs)
                      Maybe (Char, s)
_ -> s -> b -> (b, b)
forall b s. UTF8Bytes b s => s -> b -> (b, b)
bsplit s
a b
bs

-- | Split a string into two parts:  the first is the longest prefix
-- that contains only characters that do not satisfy the predicate; the second
-- part is the rest of the string.
-- Invalid characters are passed as @\'\\0xFFFD\'@ to the predicate.
{-# INLINE break #-}
break :: UTF8Bytes b s => (Char -> Bool) -> b -> (b,b)
break :: forall b s. UTF8Bytes b s => (Char -> Bool) -> b -> (b, b)
break Char -> Bool
p b
bs = (Char -> Bool) -> b -> (b, b)
forall b s. UTF8Bytes b s => (Char -> Bool) -> b -> (b, b)
span (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
p) b
bs

-- | Get the first character of a byte string, if any.
-- Malformed characters are replaced by @\'\\0xFFFD\'@.
{-# INLINE uncons #-}
uncons :: UTF8Bytes b s => b -> Maybe (Char,b)
uncons :: forall b s. UTF8Bytes b s => b -> Maybe (Char, b)
uncons b
bs = do (Char
c,s
n) <- b -> Maybe (Char, s)
forall b s. UTF8Bytes b s => b -> Maybe (Char, s)
decode b
bs
               (Char, b) -> Maybe (Char, b)
forall a. a -> Maybe a
forall (m :: * -> *) a. Monad m => a -> m a
return (Char
c, s -> b -> b
forall b s. UTF8Bytes b s => s -> b -> b
bdrop s
n b
bs)

-- | Traverse a bytestring (right biased).
{-# SPECIALIZE foldr :: (Char -> a -> a) -> a -> B.ByteString -> a #-}
{-# SPECIALIZE foldr :: (Char -> a -> a) -> a -> L.ByteString -> a #-}
{-# SPECIALIZE foldr :: (Char -> a -> a) -> a -> [Word8]      -> a #-}
foldr :: UTF8Bytes b s => (Char -> a -> a) -> a -> b -> a
foldr :: forall b s a. UTF8Bytes b s => (Char -> a -> a) -> a -> b -> a
foldr Char -> a -> a
cons a
nil b
cs = case b -> Maybe (Char, b)
forall b s. UTF8Bytes b s => b -> Maybe (Char, b)
uncons b
cs of
                      Just (Char
a,b
as) -> Char -> a -> a
cons Char
a ((Char -> a -> a) -> a -> b -> a
forall b s a. UTF8Bytes b s => (Char -> a -> a) -> a -> b -> a
foldr Char -> a -> a
cons a
nil b
as)
                      Maybe (Char, b)
Nothing     -> a
nil

-- | Traverse a bytestring (left biased).
-- This function is strict in the accumulator.
{-# SPECIALIZE foldl :: (a -> Char -> a) -> a -> B.ByteString -> a #-}
{-# SPECIALIZE foldl :: (a -> Char -> a) -> a -> L.ByteString -> a #-}
{-# SPECIALIZE foldl :: (a -> Char -> a) -> a -> [Word8]      -> a #-}
foldl :: UTF8Bytes b s => (a -> Char -> a) -> a -> b -> a
foldl :: forall b s a. UTF8Bytes b s => (a -> Char -> a) -> a -> b -> a
foldl a -> Char -> a
add a
acc b
cs  = case b -> Maybe (Char, b)
forall b s. UTF8Bytes b s => b -> Maybe (Char, b)
uncons b
cs of
                      Just (Char
a,b
as) -> let v :: a
v = a -> Char -> a
add a
acc Char
a
                                     in a -> a -> a
forall a b. a -> b -> b
seq a
v ((a -> Char -> a) -> a -> b -> a
forall b s a. UTF8Bytes b s => (a -> Char -> a) -> a -> b -> a
foldl a -> Char -> a
add a
v b
as)
                      Maybe (Char, b)
Nothing     -> a
acc

-- | Counts the number of characters encoded in the bytestring.
-- Note that this includes replacement characters.
{-# SPECIALIZE length :: B.ByteString -> Int #-}
{-# SPECIALIZE length :: L.ByteString -> Int64 #-}
{-# SPECIALIZE length :: [Word8]      -> Int #-}
length :: UTF8Bytes b s => b -> s
length :: forall b s. UTF8Bytes b s => b -> s
length b
b = s -> b -> s
forall {t} {s} {t}. (UTF8Bytes t s, Num t) => t -> t -> t
loop s
0 b
b
  where loop :: t -> t -> t
loop t
n t
xs = case t -> Maybe (Char, s)
forall b s. UTF8Bytes b s => b -> Maybe (Char, s)
decode t
xs of
                      Just (Char
_,s
m) -> t -> t -> t
loop (t
nt -> t -> t
forall a. Num a => a -> a -> a
+t
1) (s -> t -> t
forall b s. UTF8Bytes b s => s -> b -> b
bdrop s
m t
xs)
                      Maybe (Char, s)
Nothing -> t
n

-- | Split a string into a list of lines.
-- Lines are terminated by @\'\\n\'@ or the end of the string.
-- Empty lines may not be terminated by the end of the string.
-- See also 'lines''.
{-# SPECIALIZE lines :: B.ByteString -> [B.ByteString] #-}
{-# SPECIALIZE lines :: L.ByteString -> [L.ByteString] #-}
{-# SPECIALIZE lines :: [Word8]      -> [[Word8]]       #-}
lines :: UTF8Bytes b s => b -> [b]
lines :: forall b s. UTF8Bytes b s => b -> [b]
lines b
bs | b -> Bool
forall b s. UTF8Bytes b s => b -> Bool
null b
bs  = []
lines b
bs = case Word8 -> b -> Maybe s
forall b s. UTF8Bytes b s => Word8 -> b -> Maybe s
elemIndex Word8
10 b
bs of
             Just s
x -> let (b
xs,b
ys) = s -> b -> (b, b)
forall b s. UTF8Bytes b s => s -> b -> (b, b)
bsplit s
x b
bs
                       in b
xs b -> [b] -> [b]
forall a. a -> [a] -> [a]
: b -> [b]
forall b s. UTF8Bytes b s => b -> [b]
lines (b -> b
forall b s. UTF8Bytes b s => b -> b
tail b
ys)
             Maybe s
Nothing -> [b
bs]

-- | Split a string into a list of lines.
-- Lines are terminated by @\'\\n\'@ or the end of the string.
-- Empty lines may not be terminated by the end of the string.
-- This function preserves the terminators.
-- See also 'lines'.
{-# SPECIALIZE lines' :: B.ByteString -> [B.ByteString] #-}
{-# SPECIALIZE lines' :: L.ByteString -> [L.ByteString] #-}
{-# SPECIALIZE lines' :: [Word8]      -> [[Word8]]      #-}
lines' :: UTF8Bytes b s => b -> [b]
lines' :: forall b s. UTF8Bytes b s => b -> [b]
lines' b
bs | b -> Bool
forall b s. UTF8Bytes b s => b -> Bool
null b
bs  = []
lines' b
bs = case Word8 -> b -> Maybe s
forall b s. UTF8Bytes b s => Word8 -> b -> Maybe s
elemIndex Word8
10 b
bs of
              Just s
x -> let (b
xs,b
ys) = s -> b -> (b, b)
forall b s. UTF8Bytes b s => s -> b -> (b, b)
bsplit (s
xs -> s -> s
forall a. Num a => a -> a -> a
+s
1) b
bs
                        in b
xs b -> [b] -> [b]
forall a. a -> [a] -> [a]
: b -> [b]
forall b s. UTF8Bytes b s => b -> [b]
lines' b
ys
              Maybe s
Nothing -> [b
bs]

-----------
-- Compatibility functions for base-2

unconsB :: B.ByteString -> Maybe (Word8,B.ByteString)
unconsL :: L.ByteString -> Maybe (Word8,L.ByteString)

#ifdef BYTESTRING_IN_BASE
unconsB bs | B.null bs = Nothing
           | otherwise = Just (unsafeHead bs, unsafeTail bs)

unconsL bs = case L.toChunks bs of
    (x:xs) | not (B.null x)     -> Just (unsafeHead x, L.fromChunks (unsafeTail x:xs))
    _                           -> Nothing
#else
unconsB :: ByteString -> Maybe (Word8, ByteString)
unconsB = ByteString -> Maybe (Word8, ByteString)
B.uncons
unconsL :: ByteString -> Maybe (Word8, ByteString)
unconsL = ByteString -> Maybe (Word8, ByteString)
L.uncons
#endif