-- ------------------------------------------------------------

{- |
   Module     : Data.Set.CharSet
   Copyright  : Copyright (C) 2010 - Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : stable
   Portability: portable

   Char sets implemeted as non-overlapping ordered lists of intervalls

-}

-- ------------------------------------------------------------

module Data.Set.CharSet
    ( CharSet
    , emptyCS
    , allCS
    , singleCS
    , stringCS
    , rangeCS
    , nullCS
    , fullCS
    , unionCS
    , diffCS
    , intersectCS
    , exorCS
    , compCS
    , elemCS
    , toListCS
    )
where

-- ------------------------------------------------------------

type CharSet            = [(Char, Char)]

emptyCS                 :: CharSet
emptyCS :: CharSet
emptyCS                 = []
{-# INLINE emptyCS #-}

allCS                   :: CharSet
allCS :: CharSet
allCS                   = [(Char
forall a. Bounded a => a
minBound, Char
forall a. Bounded a => a
maxBound)]
{-# INLINE allCS #-}

singleCS                :: Char -> CharSet
singleCS :: Char -> CharSet
singleCS Char
c              = [(Char
c,Char
c)]
{-# INLINE singleCS #-}

stringCS                :: String -> CharSet
stringCS :: String -> CharSet
stringCS                = (Char -> CharSet -> CharSet) -> CharSet -> String -> CharSet
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (CharSet -> CharSet -> CharSet
unionCS (CharSet -> CharSet -> CharSet)
-> (Char -> CharSet) -> Char -> CharSet -> CharSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> CharSet
singleCS) CharSet
emptyCS
{-# INLINE stringCS #-}

rangeCS                 :: Char -> Char -> CharSet
rangeCS :: Char -> Char -> CharSet
rangeCS Char
l Char
u
    | Char
l Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
u            = [(Char
l,Char
u)]
    | Bool
otherwise         = CharSet
emptyCS
{-# INLINE rangeCS #-}

nullCS                  :: CharSet -> Bool
nullCS :: CharSet -> Bool
nullCS                  = CharSet -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null
{-# INLINE nullCS #-}

fullCS                  :: CharSet -> Bool
fullCS :: CharSet -> Bool
fullCS [(Char
lb, Char
ub)]
                        = Char
lb Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
forall a. Bounded a => a
minBound
                          Bool -> Bool -> Bool
&&
                          Char
ub Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
forall a. Bounded a => a
maxBound
fullCS CharSet
_                = Bool
False

elemCS                  :: Char -> CharSet -> Bool
elemCS :: Char -> CharSet -> Bool
elemCS Char
i                = ((Char, Char) -> Bool -> Bool) -> Bool -> CharSet -> Bool
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\ (Char
lb, Char
ub) Bool
b -> Char
i Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
lb Bool -> Bool -> Bool
&& (Char
i Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
ub Bool -> Bool -> Bool
|| Bool
b)) Bool
False
{-# INLINE elemCS #-}

toListCS                :: CharSet -> [Char]
toListCS :: CharSet -> String
toListCS                = ((Char, Char) -> String) -> CharSet -> String
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\ (Char
lb, Char
ub) -> [Char
lb..Char
ub])

unionCS                 :: CharSet -> CharSet -> CharSet

unionCS :: CharSet -> CharSet -> CharSet
unionCS [] CharSet
s2           = CharSet
s2
unionCS CharSet
s1 []           = CharSet
s1

unionCS s1 :: CharSet
s1@((Char
l1,Char
u1):CharSet
s1') s2 :: CharSet
s2@((Char
l2,Char
u2):CharSet
s2')
    | Char
l1 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<  Char
l2          = Char -> Char -> CharSet -> CharSet
forall {t}. (Ord t, Enum t) => t -> t -> [(t, t)] -> [(t, t)]
union Char
l1 Char
u1            (CharSet -> CharSet) -> CharSet -> CharSet
forall a b. (a -> b) -> a -> b
$ CharSet -> CharSet -> CharSet
unionCS CharSet
s1' CharSet
s2
    | Char
l1 Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
l2          = Char -> Char -> CharSet -> CharSet
forall {t}. (Ord t, Enum t) => t -> t -> [(t, t)] -> [(t, t)]
union Char
l1 (Char
u1 Char -> Char -> Char
forall a. Ord a => a -> a -> a
`max` Char
u2) (CharSet -> CharSet) -> CharSet -> CharSet
forall a b. (a -> b) -> a -> b
$ CharSet -> CharSet -> CharSet
unionCS CharSet
s1' CharSet
s2'
    | Bool
otherwise         = Char -> Char -> CharSet -> CharSet
forall {t}. (Ord t, Enum t) => t -> t -> [(t, t)] -> [(t, t)]
union Char
l2 Char
u2            (CharSet -> CharSet) -> CharSet -> CharSet
forall a b. (a -> b) -> a -> b
$ CharSet -> CharSet -> CharSet
unionCS CharSet
s1  CharSet
s2'
    -- l1 >  l2
    where
    union :: t -> t -> [(t, t)] -> [(t, t)]
union t
l t
u []        = [(t
l,t
u)]
    union t
l t
u s :: [(t, t)]
s@((t
l', t
u') : [(t, t)]
s')
        | t
u t -> t -> Bool
forall a. Ord a => a -> a -> Bool
< t -> t
forall a. Enum a => a -> a
pred t
l'   = (t
l,t
u) (t, t) -> [(t, t)] -> [(t, t)]
forall a. a -> [a] -> [a]
: [(t, t)]
s
        | Bool
otherwise     = t -> t -> [(t, t)] -> [(t, t)]
union t
l (t
u t -> t -> t
forall a. Ord a => a -> a -> a
`max` t
u') [(t, t)]
s'

diffCS                  :: CharSet -> CharSet -> CharSet

diffCS :: CharSet -> CharSet -> CharSet
diffCS [] CharSet
_             = []
diffCS CharSet
s  []            = CharSet
s

diffCS s1 :: CharSet
s1@((Char
l1,Char
u1):CharSet
s1') s2 :: CharSet
s2@((Char
l2,Char
u2):CharSet
s2')
    | Char
u1 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
l2           = (Char
l1,Char
u1) (Char, Char) -> CharSet -> CharSet
forall a. a -> [a] -> [a]
: CharSet -> CharSet -> CharSet
diffCS CharSet
s1' CharSet
s2       -- whole intervall remains in set
    | Char
u2 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
l1           = CharSet -> CharSet -> CharSet
diffCS CharSet
s1  CharSet
s2'                -- no elements to remove
                                                        -- intervalls overlap
    | Bool
otherwise         = CharSet
p CharSet -> CharSet -> CharSet
forall a. [a] -> [a] -> [a]
++ CharSet -> CharSet -> CharSet
diffCS (CharSet
s CharSet -> CharSet -> CharSet
forall a. [a] -> [a] -> [a]
++ CharSet
s1') CharSet
s2
    where
    p :: CharSet
p | Char
l1 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
l2         = [(Char
l1, Char -> Char
forall a. Enum a => a -> a
pred Char
l2)]               -- = rangeCS l1 (pred l2), but prevent underflow
      | Bool
otherwise       = []
    s :: CharSet
s | Char
u2 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
u1         = [(Char -> Char
forall a. Enum a => a -> a
succ Char
u2, Char
u1)]               -- = rangeCS (succ u2) u1, but prevent overflow
      | Bool
otherwise       = []

compCS                  :: CharSet -> CharSet
compCS :: CharSet -> CharSet
compCS                  = (CharSet
allCS CharSet -> CharSet -> CharSet
`diffCS`)

intersectCS             :: CharSet -> CharSet -> CharSet

intersectCS :: CharSet -> CharSet -> CharSet
intersectCS []  CharSet
_s2     = []
intersectCS CharSet
_s1  []     = []

intersectCS s1 :: CharSet
s1@((Char
l1,Char
u1):CharSet
s1') s2 :: CharSet
s2@((Char
l2,Char
u2):CharSet
s2')
    | Char
u1 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
l2           = CharSet -> CharSet -> CharSet
intersectCS CharSet
s1' CharSet
s2
    | Char
u2 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
l1           = CharSet -> CharSet -> CharSet
intersectCS CharSet
s1  CharSet
s2'
                                                        -- intervalls overlap
    | Bool
otherwise         = (Char, Char)
i (Char, Char) -> CharSet -> CharSet
forall a. a -> [a] -> [a]
: CharSet
isect
    where
    i :: (Char, Char)
i                   = (Char
l1 Char -> Char -> Char
forall a. Ord a => a -> a -> a
`max` Char
l2, Char
u1 Char -> Char -> Char
forall a. Ord a => a -> a -> a
`min` Char
u2)
    isect :: CharSet
isect
        | Char
u1 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
u2       = CharSet -> CharSet -> CharSet
intersectCS CharSet
s1' CharSet
s2
        | Bool
otherwise     = CharSet -> CharSet -> CharSet
intersectCS CharSet
s1  CharSet
s2'

exorCS                  :: CharSet -> CharSet -> CharSet

exorCS :: CharSet -> CharSet -> CharSet
exorCS [] CharSet
s2            = CharSet
s2
exorCS CharSet
s1 []            = CharSet
s1

exorCS s1 :: CharSet
s1@(i1 :: (Char, Char)
i1@(Char
l1,Char
u1):CharSet
s1') s2 :: CharSet
s2@(i2 :: (Char, Char)
i2@(Char
l2,Char
u2):CharSet
s2')
    | Char
u1 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
l2           = (Char, Char)
i1 (Char, Char) -> CharSet -> CharSet
forall a. a -> [a] -> [a]
: CharSet -> CharSet -> CharSet
exorCS CharSet
s1' CharSet
s2
    | Char
u2 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
l1           = (Char, Char)
i2 (Char, Char) -> CharSet -> CharSet
forall a. a -> [a] -> [a]
: CharSet -> CharSet -> CharSet
exorCS CharSet
s1  CharSet
s2'
                                                        -- intervalls overlap
    | Bool
otherwise         = CharSet
i CharSet -> CharSet -> CharSet
forall a. [a] -> [a] -> [a]
++ CharSet
exor'
    where
    i :: CharSet
i   | Char
l1 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
l2       = [(Char
l1, Char -> Char
forall a. Enum a => a -> a
pred Char
l2)]
        | Char
l2 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
l1       = [(Char
l2, Char -> Char
forall a. Enum a => a -> a
pred Char
l1)]
        | Bool
otherwise     = []
    exor' :: CharSet
exor'
        | Char
u1 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
u2       = CharSet -> CharSet -> CharSet
exorCS                  CharSet
s1' ((Char -> Char
forall a. Enum a => a -> a
succ Char
u1, Char
u2) (Char, Char) -> CharSet -> CharSet
forall a. a -> [a] -> [a]
: CharSet
s2')
        | Char
u2 Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
u1       = CharSet -> CharSet -> CharSet
exorCS ((Char -> Char
forall a. Enum a => a -> a
succ Char
u2, Char
u1) (Char, Char) -> CharSet -> CharSet
forall a. a -> [a] -> [a]
: CharSet
s1')                 CharSet
s2'
        | Bool
otherwise     = CharSet -> CharSet -> CharSet
exorCS                  CharSet
s1'                  CharSet
s2'

-- ------------------------------------------------------------