{-# OPTIONS_GHC -fspec-constr #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ >= 702
{-# LANGUAGE Trustworthy #-}
#endif
-----------------------------------------------------------------------------
-- |
-- Module      :  Data.CharSet
-- Copyright   :  (c) Edward Kmett 2010-2011
-- License     :  BSD3
-- Maintainer  :  ekmett@gmail.com
-- Stability   :  experimental
-- Portability :  portable
--
-- A CharSet is an /efficient/ representation of a set of 'Char' values
-- designed for fast membership tests.
--
-- As an example @build isAlpha@ will create a set of alphabetic characters.
-- We can then use 'member' on the generated set to /efficiently/ test if a
-- given @Char@ represents an alphabetic character.
--
-- Designed to be imported qualified:
--
-- > import Data.CharSet (CharSet)
-- > import qualified Data.CharSet as CharSet
--
-------------------------------------------------------------------------------

module Data.CharSet
    (
    -- * Set type
      CharSet(..)
    -- * Operators
    , (\\)
    -- * Query
    , null
    , size
    , member
    , notMember
    , overlaps, isSubsetOf
    , isComplemented
    -- * Construction
    , build
    , empty
    , singleton
    , full
    , insert
    , delete
    , complement
    , range
    -- * Combine
    , union
    , intersection
    , difference
    -- * Filter
    , filter
    , partition
    -- * Map
    , map
    -- * Fold
    , fold
    -- * Conversion
    -- ** List
    , toList
    , fromList
    -- ** Ordered list
    , toAscList
    , fromAscList
    , fromDistinctAscList
    -- ** IntMaps
    , fromCharSet
    , toCharSet
    -- ** Array
    , toArray
    ) where

#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ >= 608
import Data.String (IsString(..))
-- <<< -XOverloadedStrings >>> was introduced by GHC 6.8.1
#endif

import Data.Array.Unboxed hiding (range)
import Data.Data
import Data.Function (on)
import Data.IntSet (IntSet)
import Data.CharSet.ByteSet (ByteSet)
import qualified Data.CharSet.ByteSet as ByteSet
import Data.Bits hiding (complement)
import Data.Word
import Data.ByteString.Internal (c2w)
import Data.Semigroup
import qualified Data.IntSet as I
import qualified Data.List as L
import Prelude hiding (filter, map, null)
import qualified Prelude as P
import Text.Read

-- | Stored as a (possibly negated) IntSet and a fast set used for the head byte.
--
-- The set of valid (possibly negated) head bytes is stored unboxed as a 32-byte
-- bytestring-based lookup table.
data CharSet = CharSet
    !Bool    -- Whether ByteSet and IntSet are negated
    !ByteSet -- Set of head bytes, unboxed
    !IntSet  -- Set of characters in the charset
  deriving Typeable

#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ >= 608
-- | @= CharSet.`fromList`@
instance IsString CharSet where
  fromString :: String -> CharSet
fromString = String -> CharSet
fromList
#endif

charSet :: Bool -> IntSet -> CharSet
charSet :: Bool -> IntSet -> CharSet
charSet Bool
b IntSet
s = Bool -> ByteSet -> IntSet -> CharSet
CharSet Bool
b ([Word8] -> ByteSet
ByteSet.fromList ((Int -> Word8) -> [Int] -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Int -> Word8
headByte (IntSet -> [Int]
I.toAscList IntSet
s))) IntSet
s

headByte :: Int -> Word8
headByte :: Int -> Word8
headByte Int
i
  | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7f   = Int -> Word8
forall a. Enum a => Int -> a
toEnum Int
i
  | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7ff  = Int -> Word8
forall a. Enum a => Int -> a
toEnum (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
6)
  | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xffff = Int -> Word8
forall a. Enum a => Int -> a
toEnum (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Int
0xe0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
12)
  | Bool
otherwise   = Int -> Word8
forall a. Enum a => Int -> a
toEnum (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Int
0xf0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
i Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
18)

pos :: IntSet -> CharSet
pos :: IntSet -> CharSet
pos = Bool -> IntSet -> CharSet
charSet Bool
True

neg :: IntSet -> CharSet
neg :: IntSet -> CharSet
neg = Bool -> IntSet -> CharSet
charSet Bool
False

(\\) :: CharSet -> CharSet -> CharSet
\\ :: CharSet -> CharSet -> CharSet
(\\) = CharSet -> CharSet -> CharSet
difference

-- | Applies a predicate across the whole range of possible character values
-- to create a set of only those characters which satisfy the predicate.
--
-- As an example @build isAlpha@ will generate a CharSet of all
-- alphabetic characters.
build :: (Char -> Bool) -> CharSet
build :: (Char -> Bool) -> CharSet
build Char -> Bool
p = String -> CharSet
fromDistinctAscList (String -> CharSet) -> String -> CharSet
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
P.filter Char -> Bool
p [Char
forall a. Bounded a => a
minBound .. Char
forall a. Bounded a => a
maxBound]
{-# INLINE build #-}

map :: (Char -> Char) -> CharSet -> CharSet
map :: (Char -> Char) -> CharSet -> CharSet
map Char -> Char
f (CharSet Bool
True ByteSet
_ IntSet
i) = IntSet -> CharSet
pos ((Int -> Int) -> IntSet -> IntSet
I.map (Char -> Int
forall a. Enum a => a -> Int
fromEnum (Char -> Int) -> (Int -> Char) -> Int -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Char
f (Char -> Char) -> (Int -> Char) -> Int -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum) IntSet
i)
map Char -> Char
f (CharSet Bool
False ByteSet
_ IntSet
i) = String -> CharSet
fromList (String -> CharSet) -> String -> CharSet
forall a b. (a -> b) -> a -> b
$ (Char -> Char) -> String -> String
forall a b. (a -> b) -> [a] -> [b]
P.map Char -> Char
f (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
P.filter (\Char
x -> Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
x Int -> IntSet -> Bool
`I.notMember` IntSet
i) [Char
ul..Char
uh]
{-# INLINE map #-}

isComplemented :: CharSet -> Bool
isComplemented :: CharSet -> Bool
isComplemented (CharSet Bool
True ByteSet
_ IntSet
_) = Bool
False
isComplemented (CharSet Bool
False ByteSet
_ IntSet
_) = Bool
True
{-# INLINE isComplemented #-}

toList :: CharSet -> String
toList :: CharSet -> String
toList (CharSet Bool
True ByteSet
_ IntSet
i) = (Int -> Char) -> [Int] -> String
forall a b. (a -> b) -> [a] -> [b]
P.map Int -> Char
forall a. Enum a => Int -> a
toEnum (IntSet -> [Int]
I.toList IntSet
i)
toList (CharSet Bool
False ByteSet
_ IntSet
i) = (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
P.filter (\Char
x -> Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
x Int -> IntSet -> Bool
`I.notMember` IntSet
i) [Char
ul..Char
uh]
{-# INLINE toList #-}

toAscList :: CharSet -> String
toAscList :: CharSet -> String
toAscList (CharSet Bool
True ByteSet
_ IntSet
i) = (Int -> Char) -> [Int] -> String
forall a b. (a -> b) -> [a] -> [b]
P.map Int -> Char
forall a. Enum a => Int -> a
toEnum (IntSet -> [Int]
I.toAscList IntSet
i)
toAscList (CharSet Bool
False ByteSet
_ IntSet
i) = (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
P.filter (\Char
x -> Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
x Int -> IntSet -> Bool
`I.notMember` IntSet
i) [Char
ul..Char
uh]
{-# INLINE toAscList #-}

empty :: CharSet
empty :: CharSet
empty = IntSet -> CharSet
pos IntSet
I.empty

singleton :: Char -> CharSet
singleton :: Char -> CharSet
singleton = IntSet -> CharSet
pos (IntSet -> CharSet) -> (Char -> IntSet) -> Char -> CharSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> IntSet
I.singleton (Int -> IntSet) -> (Char -> Int) -> Char -> IntSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
forall a. Enum a => a -> Int
fromEnum
{-# INLINE singleton #-}

full :: CharSet
full :: CharSet
full = IntSet -> CharSet
neg IntSet
I.empty

-- | /O(n)/ worst case
null :: CharSet -> Bool
null :: CharSet -> Bool
null (CharSet Bool
True ByteSet
_ IntSet
i) = IntSet -> Bool
I.null IntSet
i
null (CharSet Bool
False ByteSet
_ IntSet
i) = IntSet -> Int
I.size IntSet
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
numChars
{-# INLINE null #-}

-- | /O(n)/
size :: CharSet -> Int
size :: CharSet -> Int
size (CharSet Bool
True ByteSet
_ IntSet
i) = IntSet -> Int
I.size IntSet
i
size (CharSet Bool
False ByteSet
_ IntSet
i) = Int
numChars Int -> Int -> Int
forall a. Num a => a -> a -> a
- IntSet -> Int
I.size IntSet
i
{-# INLINE size #-}

insert :: Char -> CharSet -> CharSet
insert :: Char -> CharSet -> CharSet
insert Char
c (CharSet Bool
True ByteSet
_ IntSet
i) = IntSet -> CharSet
pos (Int -> IntSet -> IntSet
I.insert (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c) IntSet
i)
insert Char
c (CharSet Bool
False ByteSet
_ IntSet
i) = IntSet -> CharSet
neg (Int -> IntSet -> IntSet
I.delete (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c) IntSet
i)
{-# INLINE insert #-}

range :: Char -> Char -> CharSet
range :: Char -> Char -> CharSet
range Char
a Char
b
  | Char
a Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
b = String -> CharSet
fromDistinctAscList [Char
a..Char
b]
  | Bool
otherwise = CharSet
empty

delete :: Char -> CharSet -> CharSet
delete :: Char -> CharSet -> CharSet
delete Char
c (CharSet Bool
True ByteSet
_ IntSet
i) = IntSet -> CharSet
pos (Int -> IntSet -> IntSet
I.delete (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c) IntSet
i)
delete Char
c (CharSet Bool
False ByteSet
_ IntSet
i) = IntSet -> CharSet
neg (Int -> IntSet -> IntSet
I.insert (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c) IntSet
i)
{-# INLINE delete #-}

complement :: CharSet -> CharSet
complement :: CharSet -> CharSet
complement (CharSet Bool
True ByteSet
s IntSet
i) = Bool -> ByteSet -> IntSet -> CharSet
CharSet Bool
False ByteSet
s IntSet
i
complement (CharSet Bool
False ByteSet
s IntSet
i) = Bool -> ByteSet -> IntSet -> CharSet
CharSet Bool
True ByteSet
s IntSet
i
{-# INLINE complement #-}

union :: CharSet -> CharSet -> CharSet
union :: CharSet -> CharSet -> CharSet
union (CharSet Bool
True ByteSet
_ IntSet
i) (CharSet Bool
True ByteSet
_ IntSet
j) = IntSet -> CharSet
pos (IntSet -> IntSet -> IntSet
I.union IntSet
i IntSet
j)
union (CharSet Bool
True ByteSet
_ IntSet
i) (CharSet Bool
False ByteSet
_ IntSet
j) = IntSet -> CharSet
neg (IntSet -> IntSet -> IntSet
I.difference IntSet
j IntSet
i)
union (CharSet Bool
False ByteSet
_ IntSet
i) (CharSet Bool
True ByteSet
_ IntSet
j) = IntSet -> CharSet
neg (IntSet -> IntSet -> IntSet
I.difference IntSet
i IntSet
j)
union (CharSet Bool
False ByteSet
_ IntSet
i) (CharSet Bool
False ByteSet
_ IntSet
j) = IntSet -> CharSet
neg (IntSet -> IntSet -> IntSet
I.intersection IntSet
i IntSet
j)
{-# INLINE union #-}

intersection :: CharSet -> CharSet -> CharSet
intersection :: CharSet -> CharSet -> CharSet
intersection (CharSet Bool
True ByteSet
_ IntSet
i) (CharSet Bool
True ByteSet
_ IntSet
j) = IntSet -> CharSet
pos (IntSet -> IntSet -> IntSet
I.intersection IntSet
i IntSet
j)
intersection (CharSet Bool
True ByteSet
_ IntSet
i) (CharSet Bool
False ByteSet
_ IntSet
j) = IntSet -> CharSet
pos (IntSet -> IntSet -> IntSet
I.difference IntSet
i IntSet
j)
intersection (CharSet Bool
False ByteSet
_ IntSet
i) (CharSet Bool
True ByteSet
_ IntSet
j) = IntSet -> CharSet
pos (IntSet -> IntSet -> IntSet
I.difference IntSet
j IntSet
i)
intersection (CharSet Bool
False ByteSet
_ IntSet
i) (CharSet Bool
False ByteSet
_ IntSet
j) = IntSet -> CharSet
neg (IntSet -> IntSet -> IntSet
I.union IntSet
i IntSet
j)
{-# INLINE intersection #-}

difference :: CharSet -> CharSet -> CharSet
difference :: CharSet -> CharSet -> CharSet
difference (CharSet Bool
True ByteSet
_ IntSet
i) (CharSet Bool
True ByteSet
_ IntSet
j) = IntSet -> CharSet
pos (IntSet -> IntSet -> IntSet
I.difference IntSet
i IntSet
j)
difference (CharSet Bool
True ByteSet
_ IntSet
i) (CharSet Bool
False ByteSet
_ IntSet
j) = IntSet -> CharSet
pos (IntSet -> IntSet -> IntSet
I.intersection IntSet
i IntSet
j)
difference (CharSet Bool
False ByteSet
_ IntSet
i) (CharSet Bool
True ByteSet
_ IntSet
j) = IntSet -> CharSet
neg (IntSet -> IntSet -> IntSet
I.union IntSet
i IntSet
j)
difference (CharSet Bool
False ByteSet
_ IntSet
i) (CharSet Bool
False ByteSet
_ IntSet
j) = IntSet -> CharSet
pos (IntSet -> IntSet -> IntSet
I.difference IntSet
j IntSet
i)
{-# INLINE difference #-}

member :: Char -> CharSet -> Bool
member :: Char -> CharSet -> Bool
member Char
c (CharSet Bool
True ByteSet
b IntSet
i)
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Int -> Char
forall a. Enum a => Int -> a
toEnum Int
0x7f = Word8 -> ByteSet -> Bool
ByteSet.member (Char -> Word8
c2w Char
c) ByteSet
b
  | Bool
otherwise        = Int -> IntSet -> Bool
I.member (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c) IntSet
i
member Char
c (CharSet Bool
False ByteSet
b IntSet
i)
  | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Int -> Char
forall a. Enum a => Int -> a
toEnum Int
0x7f = Bool -> Bool
not (Word8 -> ByteSet -> Bool
ByteSet.member (Char -> Word8
c2w Char
c) ByteSet
b)
  | Bool
otherwise        = Int -> IntSet -> Bool
I.notMember (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c) IntSet
i
{-# INLINE member #-}

notMember :: Char -> CharSet -> Bool
notMember :: Char -> CharSet -> Bool
notMember Char
c CharSet
s = Bool -> Bool
not (Char -> CharSet -> Bool
member Char
c CharSet
s)
{-# INLINE notMember #-}

fold :: (Char -> b -> b) -> b -> CharSet -> b
fold :: forall b. (Char -> b -> b) -> b -> CharSet -> b
fold Char -> b -> b
f b
z (CharSet Bool
True ByteSet
_ IntSet
i) = (Int -> b -> b) -> b -> IntSet -> b
forall b. (Int -> b -> b) -> b -> IntSet -> b
I.fold (Char -> b -> b
f (Char -> b -> b) -> (Int -> Char) -> Int -> b -> b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum) b
z IntSet
i
fold Char -> b -> b
f b
z (CharSet Bool
False ByteSet
_ IntSet
i) = (Char -> b -> b) -> b -> String -> b
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Char -> b -> b
f b
z (String -> b) -> String -> b
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
P.filter (\Char
x -> Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
x Int -> IntSet -> Bool
`I.notMember` IntSet
i) [Char
ul..Char
uh]
{-# INLINE fold #-}

filter :: (Char -> Bool) -> CharSet -> CharSet
filter :: (Char -> Bool) -> CharSet -> CharSet
filter Char -> Bool
p (CharSet Bool
True ByteSet
_ IntSet
i) = IntSet -> CharSet
pos ((Int -> Bool) -> IntSet -> IntSet
I.filter (Char -> Bool
p (Char -> Bool) -> (Int -> Char) -> Int -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum) IntSet
i)
filter Char -> Bool
p (CharSet Bool
False ByteSet
_ IntSet
i) = IntSet -> CharSet
neg (IntSet -> CharSet) -> IntSet -> CharSet
forall a b. (a -> b) -> a -> b
$ (Int -> IntSet -> IntSet) -> IntSet -> [Int] -> IntSet
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (Int -> IntSet -> IntSet
I.insert) IntSet
i ([Int] -> IntSet) -> [Int] -> IntSet
forall a b. (a -> b) -> a -> b
$ (Int -> Bool) -> [Int] -> [Int]
forall a. (a -> Bool) -> [a] -> [a]
P.filter (\Int
x -> (Int
x Int -> IntSet -> Bool
`I.notMember` IntSet
i) Bool -> Bool -> Bool
&& Bool -> Bool
not (Char -> Bool
p (Int -> Char
forall a. Enum a => Int -> a
toEnum Int
x))) [Int
ol..Int
oh]
{-# INLINE filter #-}

partition :: (Char -> Bool) -> CharSet -> (CharSet, CharSet)
partition :: (Char -> Bool) -> CharSet -> (CharSet, CharSet)
partition Char -> Bool
p (CharSet Bool
True ByteSet
_ IntSet
i) = (IntSet -> CharSet
pos IntSet
l, IntSet -> CharSet
pos IntSet
r)
    where (IntSet
l,IntSet
r) = (Int -> Bool) -> IntSet -> (IntSet, IntSet)
I.partition (Char -> Bool
p (Char -> Bool) -> (Int -> Char) -> Int -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum) IntSet
i
partition Char -> Bool
p (CharSet Bool
False ByteSet
_ IntSet
i) = (IntSet -> CharSet
neg ((Int -> IntSet -> IntSet) -> IntSet -> [Int] -> IntSet
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Int -> IntSet -> IntSet
I.insert IntSet
i [Int]
l), IntSet -> CharSet
neg ((Int -> IntSet -> IntSet) -> IntSet -> [Int] -> IntSet
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Int -> IntSet -> IntSet
I.insert IntSet
i [Int]
r))
    where ([Int]
l,[Int]
r) = (Int -> Bool) -> [Int] -> ([Int], [Int])
forall a. (a -> Bool) -> [a] -> ([a], [a])
L.partition (Char -> Bool
p (Char -> Bool) -> (Int -> Char) -> Int -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum) ([Int] -> ([Int], [Int])) -> [Int] -> ([Int], [Int])
forall a b. (a -> b) -> a -> b
$ (Int -> Bool) -> [Int] -> [Int]
forall a. (a -> Bool) -> [a] -> [a]
P.filter (\Int
x -> Int
x Int -> IntSet -> Bool
`I.notMember` IntSet
i) [Int
ol..Int
oh]
{-# INLINE partition #-}

overlaps :: CharSet -> CharSet -> Bool
overlaps :: CharSet -> CharSet -> Bool
overlaps (CharSet Bool
True ByteSet
_ IntSet
i) (CharSet Bool
True ByteSet
_ IntSet
j) = Bool -> Bool
not (IntSet -> Bool
I.null (IntSet -> IntSet -> IntSet
I.intersection IntSet
i IntSet
j))
overlaps (CharSet Bool
True ByteSet
_ IntSet
i) (CharSet Bool
False ByteSet
_ IntSet
j) = Bool -> Bool
not (IntSet -> IntSet -> Bool
I.isSubsetOf IntSet
j IntSet
i)
overlaps (CharSet Bool
False ByteSet
_ IntSet
i) (CharSet Bool
True ByteSet
_ IntSet
j) = Bool -> Bool
not (IntSet -> IntSet -> Bool
I.isSubsetOf IntSet
i IntSet
j)
overlaps (CharSet Bool
False ByteSet
_ IntSet
i) (CharSet Bool
False ByteSet
_ IntSet
j) = (Int -> Bool) -> [Int] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\Int
x -> Int -> IntSet -> Bool
I.notMember Int
x IntSet
i Bool -> Bool -> Bool
&& Int -> IntSet -> Bool
I.notMember Int
x IntSet
j) [Int
ol..Int
oh] -- not likely
{-# INLINE overlaps #-}

isSubsetOf :: CharSet -> CharSet -> Bool
isSubsetOf :: CharSet -> CharSet -> Bool
isSubsetOf (CharSet Bool
True ByteSet
_ IntSet
i) (CharSet Bool
True ByteSet
_ IntSet
j) = IntSet -> IntSet -> Bool
I.isSubsetOf IntSet
i IntSet
j
isSubsetOf (CharSet Bool
True ByteSet
_ IntSet
i) (CharSet Bool
False ByteSet
_ IntSet
j) = IntSet -> Bool
I.null (IntSet -> IntSet -> IntSet
I.intersection IntSet
i IntSet
j)
isSubsetOf (CharSet Bool
False ByteSet
_ IntSet
i) (CharSet Bool
True ByteSet
_ IntSet
j) = (Int -> Bool) -> [Int] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (\Int
x -> Int -> IntSet -> Bool
I.member Int
x IntSet
i Bool -> Bool -> Bool
&& Int -> IntSet -> Bool
I.member Int
x IntSet
j) [Int
ol..Int
oh] -- not bloody likely
isSubsetOf (CharSet Bool
False ByteSet
_ IntSet
i) (CharSet Bool
False ByteSet
_ IntSet
j) = IntSet -> IntSet -> Bool
I.isSubsetOf IntSet
j IntSet
i
{-# INLINE isSubsetOf #-}

fromList :: String -> CharSet
fromList :: String -> CharSet
fromList = IntSet -> CharSet
pos (IntSet -> CharSet) -> (String -> IntSet) -> String -> CharSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> IntSet
I.fromList ([Int] -> IntSet) -> (String -> [Int]) -> String -> IntSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Int) -> String -> [Int]
forall a b. (a -> b) -> [a] -> [b]
P.map Char -> Int
forall a. Enum a => a -> Int
fromEnum
{-# INLINE fromList #-}

fromAscList :: String -> CharSet
fromAscList :: String -> CharSet
fromAscList = IntSet -> CharSet
pos (IntSet -> CharSet) -> (String -> IntSet) -> String -> CharSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> IntSet
I.fromAscList ([Int] -> IntSet) -> (String -> [Int]) -> String -> IntSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Int) -> String -> [Int]
forall a b. (a -> b) -> [a] -> [b]
P.map Char -> Int
forall a. Enum a => a -> Int
fromEnum
{-# INLINE fromAscList #-}

fromDistinctAscList :: String -> CharSet
fromDistinctAscList :: String -> CharSet
fromDistinctAscList = IntSet -> CharSet
pos (IntSet -> CharSet) -> (String -> IntSet) -> String -> CharSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> IntSet
I.fromDistinctAscList ([Int] -> IntSet) -> (String -> [Int]) -> String -> IntSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Int) -> String -> [Int]
forall a b. (a -> b) -> [a] -> [b]
P.map Char -> Int
forall a. Enum a => a -> Int
fromEnum
{-# INLINE fromDistinctAscList #-}

-- isProperSubsetOf :: CharSet -> CharSet -> Bool
-- isProperSubsetOf (P i) (P j) = I.isProperSubsetOf i j
-- isProperSubsetOf (P i) (N j) = null (I.intersection i j) && ...
-- isProperSubsetOf (N i) (N j) = I.isProperSubsetOf j i

ul, uh :: Char
ul :: Char
ul = Char
forall a. Bounded a => a
minBound
uh :: Char
uh = Char
forall a. Bounded a => a
maxBound
{-# INLINE ul #-}
{-# INLINE uh #-}

ol, oh :: Int
ol :: Int
ol = Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
ul
oh :: Int
oh = Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
uh
{-# INLINE ol #-}
{-# INLINE oh #-}

numChars :: Int
numChars :: Int
numChars = Int
oh Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
ol Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
{-# INLINE numChars #-}

instance Data CharSet where
  gfoldl :: forall (c :: * -> *).
(forall d b. Data d => c (d -> b) -> d -> c b)
-> (forall g. g -> c g) -> CharSet -> c CharSet
gfoldl forall d b. Data d => c (d -> b) -> d -> c b
k forall g. g -> c g
z CharSet
set
    | CharSet -> Bool
isComplemented CharSet
set = (CharSet -> CharSet) -> c (CharSet -> CharSet)
forall g. g -> c g
z CharSet -> CharSet
complement c (CharSet -> CharSet) -> CharSet -> c CharSet
forall d b. Data d => c (d -> b) -> d -> c b
`k` CharSet -> CharSet
complement CharSet
set
    | Bool
otherwise          = (String -> CharSet) -> c (String -> CharSet)
forall g. g -> c g
z String -> CharSet
fromList c (String -> CharSet) -> String -> c CharSet
forall d b. Data d => c (d -> b) -> d -> c b
`k` CharSet -> String
toList CharSet
set

  toConstr :: CharSet -> Constr
toConstr CharSet
set
    | CharSet -> Bool
isComplemented CharSet
set = Constr
complementConstr
    | Bool
otherwise = Constr
fromListConstr

  dataTypeOf :: CharSet -> DataType
dataTypeOf CharSet
_ = DataType
charSetDataType

  gunfold :: forall (c :: * -> *).
(forall b r. Data b => c (b -> r) -> c r)
-> (forall r. r -> c r) -> Constr -> c CharSet
gunfold forall b r. Data b => c (b -> r) -> c r
k forall r. r -> c r
z Constr
c = case Constr -> Int
constrIndex Constr
c of
    Int
1 -> c (String -> CharSet) -> c CharSet
forall b r. Data b => c (b -> r) -> c r
k ((String -> CharSet) -> c (String -> CharSet)
forall r. r -> c r
z String -> CharSet
fromList)
    Int
2 -> c (CharSet -> CharSet) -> c CharSet
forall b r. Data b => c (b -> r) -> c r
k ((CharSet -> CharSet) -> c (CharSet -> CharSet)
forall r. r -> c r
z CharSet -> CharSet
complement)
    Int
_ -> String -> c CharSet
forall a. HasCallStack => String -> a
error String
"gunfold"

fromListConstr :: Constr
fromListConstr :: Constr
fromListConstr   = DataType -> String -> [String] -> Fixity -> Constr
mkConstr DataType
charSetDataType String
"fromList" [] Fixity
Prefix
{-# NOINLINE fromListConstr #-}

complementConstr :: Constr
complementConstr :: Constr
complementConstr = DataType -> String -> [String] -> Fixity -> Constr
mkConstr DataType
charSetDataType String
"complement" [] Fixity
Prefix
{-# NOINLINE complementConstr #-}

charSetDataType :: DataType
charSetDataType :: DataType
charSetDataType  = String -> [Constr] -> DataType
mkDataType String
"Data.CharSet.CharSet" [Constr
fromListConstr, Constr
complementConstr]
{-# NOINLINE charSetDataType #-}

-- returns an intset and if the charSet is positive
fromCharSet :: CharSet -> (Bool, IntSet)
fromCharSet :: CharSet -> (Bool, IntSet)
fromCharSet (CharSet Bool
b ByteSet
_ IntSet
i) = (Bool
b, IntSet
i)
{-# INLINE fromCharSet #-}

toCharSet :: IntSet -> CharSet
toCharSet :: IntSet -> CharSet
toCharSet = IntSet -> CharSet
pos
{-# INLINE toCharSet #-}

instance Eq CharSet where
  == :: CharSet -> CharSet -> Bool
(==) = String -> String -> Bool
forall a. Eq a => a -> a -> Bool
(==) (String -> String -> Bool)
-> (CharSet -> String) -> CharSet -> CharSet -> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` CharSet -> String
toAscList

instance Ord CharSet where
  compare :: CharSet -> CharSet -> Ordering
compare = String -> String -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (String -> String -> Ordering)
-> (CharSet -> String) -> CharSet -> CharSet -> Ordering
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` CharSet -> String
toAscList

instance Bounded CharSet where
  minBound :: CharSet
minBound = CharSet
empty
  maxBound :: CharSet
maxBound = CharSet
full

-- TODO return a tighter bounded array perhaps starting from the least element present to the last element present?
toArray :: CharSet -> UArray Char Bool
toArray :: CharSet -> UArray Char Bool
toArray CharSet
set = (Char, Char) -> [(Char, Bool)] -> UArray Char Bool
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [(i, e)] -> a i e
array (Char
forall a. Bounded a => a
minBound, Char
forall a. Bounded a => a
maxBound) ([(Char, Bool)] -> UArray Char Bool)
-> [(Char, Bool)] -> UArray Char Bool
forall a b. (a -> b) -> a -> b
$ (Char -> (Char, Bool)) -> String -> [(Char, Bool)]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\Char
x -> (Char
x, Char
x Char -> CharSet -> Bool
`member` CharSet
set)) [Char
forall a. Bounded a => a
minBound .. Char
forall a. Bounded a => a
maxBound]

instance Show CharSet where
  showsPrec :: Int -> CharSet -> String -> String
showsPrec Int
d CharSet
i
    | CharSet -> Bool
isComplemented CharSet
i = Bool -> (String -> String) -> String -> String
showParen (Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
10) ((String -> String) -> String -> String)
-> (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ String -> String -> String
showString String
"complement " (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> CharSet -> String -> String
forall a. Show a => Int -> a -> String -> String
showsPrec Int
11 (CharSet -> CharSet
complement CharSet
i)
    | Bool
otherwise        = Bool -> (String -> String) -> String -> String
showParen (Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
10) ((String -> String) -> String -> String)
-> (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ String -> String -> String
showString String
"fromDistinctAscList " (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String -> String -> String
forall a. Show a => Int -> a -> String -> String
showsPrec Int
11 (CharSet -> String
toAscList CharSet
i)

instance Read CharSet where
  readPrec :: ReadPrec CharSet
readPrec = ReadPrec CharSet -> ReadPrec CharSet
forall a. ReadPrec a -> ReadPrec a
parens (ReadPrec CharSet -> ReadPrec CharSet)
-> ReadPrec CharSet -> ReadPrec CharSet
forall a b. (a -> b) -> a -> b
$ ReadPrec CharSet
complemented ReadPrec CharSet -> ReadPrec CharSet -> ReadPrec CharSet
forall a. ReadPrec a -> ReadPrec a -> ReadPrec a
+++ ReadPrec CharSet
normal
    where
      complemented :: ReadPrec CharSet
complemented = Int -> ReadPrec CharSet -> ReadPrec CharSet
forall a. Int -> ReadPrec a -> ReadPrec a
prec Int
10 (ReadPrec CharSet -> ReadPrec CharSet)
-> ReadPrec CharSet -> ReadPrec CharSet
forall a b. (a -> b) -> a -> b
$ do
        Ident String
"complement" <- ReadPrec Lexeme
lexP
        CharSet -> CharSet
complement (CharSet -> CharSet) -> ReadPrec CharSet -> ReadPrec CharSet
forall a b. (a -> b) -> ReadPrec a -> ReadPrec b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` ReadPrec CharSet -> ReadPrec CharSet
forall a. ReadPrec a -> ReadPrec a
step ReadPrec CharSet
forall a. Read a => ReadPrec a
readPrec
      normal :: ReadPrec CharSet
normal = Int -> ReadPrec CharSet -> ReadPrec CharSet
forall a. Int -> ReadPrec a -> ReadPrec a
prec Int
10 (ReadPrec CharSet -> ReadPrec CharSet)
-> ReadPrec CharSet -> ReadPrec CharSet
forall a b. (a -> b) -> a -> b
$ do
        Ident String
"fromDistinctAscList" <- ReadPrec Lexeme
lexP
        String -> CharSet
fromDistinctAscList (String -> CharSet) -> ReadPrec String -> ReadPrec CharSet
forall a b. (a -> b) -> ReadPrec a -> ReadPrec b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` ReadPrec String -> ReadPrec String
forall a. ReadPrec a -> ReadPrec a
step ReadPrec String
forall a. Read a => ReadPrec a
readPrec

instance Semigroup CharSet where
  <> :: CharSet -> CharSet -> CharSet
(<>) = CharSet -> CharSet -> CharSet
union

instance Monoid CharSet where
  mempty :: CharSet
mempty = CharSet
empty
#if !(MIN_VERSION_base(4,11,0))
  mappend = union
#endif