-- ------------------------------------------------------------

{- |
   Module     : Data.Char.Properties.XMLCharProps
   Copyright  : Copyright (C) 2010 - Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : stable
   Portability: portable

   XML character properties

-}

-- ------------------------------------------------------------

module Data.Char.Properties.XMLCharProps
    ( isXmlChar
    , isXmlCharCR
    , isXml1ByteChar
    , isXmlLatin1Char
    , isXmlSpaceChar
    , isXmlSpaceCharCR
    , isXml11SpaceChar
    , isXmlNameChar
    , isXmlNameStartChar
    , isXmlNCNameChar
    , isXmlNCNameStartChar
    , isXmlPubidChar
    , isXmlLetter
    , isXmlBaseChar
    , isXmlIdeographicChar
    , isXmlCombiningChar
    , isXmlDigit
    , isXmlExtender
    , isXmlControlOrPermanentlyUndefined

    , charPropXmlChar
    , charPropXmlCharCR
    , charPropXml1ByteChar
    , charPropXmlLatin1Char
    , charPropXmlSpaceChar
    , charPropXmlSpaceCharCR
    , charPropXml11SpaceChar
    , charPropXmlNameChar
    , charPropXmlNameStartChar
    , charPropXmlNCNameChar
    , charPropXmlNCNameStartChar
    , charPropXmlPubidChar
    , charPropXmlLetter
    , charPropXmlBaseChar
    , charPropXmlIdeographicChar
    , charPropXmlCombiningChar
    , charPropXmlDigit
    , charPropXmlExtender
    , charPropXmlControlOrPermanentlyUndefined
    )
where

import Data.Set.CharSet

-- ------------------------------------------------------------

-- |
-- checking for valid XML characters

isXmlChar :: Char -> Bool
isXmlChar :: Char -> Bool
isXmlChar Char
c                                     -- optimized
    = ( Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
' ' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\55295' )
      Bool -> Bool -> Bool
||
      Char
c Char -> [Char] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'\n', Char
'\t', Char
'\r']
      Bool -> Bool -> Bool
||
      ( Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\57344'
        Bool -> Bool -> Bool
&&
        ( Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\65533'
          Bool -> Bool -> Bool
||
          Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\65536' Bool -> Bool -> Bool
&&   Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\1114111'
        )
      )

{- old
isXmlChar c = c `elemCS` charPropXmlChar
-}

{-# INLINE isXmlChar #-}

charPropXmlChar :: CharSet
charPropXmlChar :: CharSet
charPropXmlChar
    = [ (Char
'\x0009', Char
'\x000A')
      , (Char
'\x000D', Char
'\x000D')
      , (Char
'\x0020', Char
'\xD7FF')
      , (Char
'\xE000', Char
'\xFFFD')
      , (Char
'\x10000', Char
'\x10FFFF')
      ]

-- |
-- checking for valid XML characters, except CR

isXmlCharCR :: Char -> Bool
isXmlCharCR :: Char -> Bool
isXmlCharCR Char
c                                     -- optimized
    = ( Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
' ' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\55295' )
      Bool -> Bool -> Bool
||
      Char
c Char -> [Char] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'\n', Char
'\t']
      Bool -> Bool -> Bool
||
      ( Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\57344'
        Bool -> Bool -> Bool
&&
        ( Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\65533'
          Bool -> Bool -> Bool
||
          Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\65536' Bool -> Bool -> Bool
&&   Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\1114111'
        )
      )

{- old
isXmlCharCR c = c `elemCS` charPropXmlCharCR
-}

{-# INLINE isXmlCharCR #-}

charPropXmlCharCR :: CharSet
charPropXmlCharCR :: CharSet
charPropXmlCharCR
    = [ (Char
'\x0009', Char
'\x000A')
      , (Char
'\x0020', Char
'\xD7FF')
      , (Char
'\xE000', Char
'\xFFFD')
      , (Char
'\x10000', Char
'\x10FFFF')
      ]

-- |
-- check for a legal 1 byte XML char

isXml1ByteChar :: Char -> Bool
isXml1ByteChar :: Char -> Bool
isXml1ByteChar Char
c = Char
c Char -> CharSet -> Bool
`elemCS` CharSet
charPropXml1ByteChar
{-# INLINE isXml1ByteChar #-}

charPropXml1ByteChar :: CharSet
charPropXml1ByteChar :: CharSet
charPropXml1ByteChar
    = [Char] -> CharSet
stringCS [Char
'\x09', Char
'\x0A', Char
'\x0D']
      CharSet -> CharSet -> CharSet
`unionCS`
      [ (Char
'\x20', Char
'\x7F') ]

-- |
-- test for a legal latin1 XML char

isXmlLatin1Char :: Char -> Bool
isXmlLatin1Char :: Char -> Bool
isXmlLatin1Char Char
c = Char
c Char -> CharSet -> Bool
`elemCS` CharSet
charPropXmlLatin1Char
{-# INLINE isXmlLatin1Char #-}

charPropXmlLatin1Char :: CharSet
charPropXmlLatin1Char :: CharSet
charPropXmlLatin1Char
    = CharSet
charPropXml1ByteChar
      CharSet -> CharSet -> CharSet
`unionCS`
      [ (Char
'\x80', Char
'\xFF') ]

-- |
-- checking for XML space character: \\\n, \\\r, \\\t and \" \"

isXmlSpaceChar :: Char -> Bool
isXmlSpaceChar :: Char -> Bool
isXmlSpaceChar Char
c
    = Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' '
      Bool -> Bool -> Bool
||
      Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n'
      Bool -> Bool -> Bool
||
      Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\t'
      Bool -> Bool -> Bool
||
      Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\r'

{- old
isXmlSpaceChar c = c `elemCS` charPropXmlSpaceChar
-}
{-# INLINE isXmlSpaceChar #-}

charPropXmlSpaceChar          :: CharSet
charPropXmlSpaceChar :: CharSet
charPropXmlSpaceChar
    = [Char] -> CharSet
stringCS [Char
'\x20', Char
'\x09', Char
'\x0D', Char
'\x0A']

-- |
-- checking for XML space character: \\\n, \\\t and \" \"

isXmlSpaceCharCR :: Char -> Bool
isXmlSpaceCharCR :: Char -> Bool
isXmlSpaceCharCR Char
c
    = Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' '
      Bool -> Bool -> Bool
||
      Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n'
      Bool -> Bool -> Bool
||
      Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\t'

{- old
isXmlSpaceCharCR c = c `elemCS` charPropXmlSpaceCharCR
-}
{-# INLINE isXmlSpaceCharCR #-}

charPropXmlSpaceCharCR          :: CharSet
charPropXmlSpaceCharCR :: CharSet
charPropXmlSpaceCharCR
    = [Char] -> CharSet
stringCS [Char
'\x20', Char
'\x09', Char
'\x0A']

-- |
-- checking for XML1.1 space character: additional space 0x85 and 0x2028
--
-- see also : 'isXmlSpaceChar'

isXml11SpaceChar :: Char -> Bool
isXml11SpaceChar :: Char -> Bool
isXml11SpaceChar Char
c = Char
c Char -> CharSet -> Bool
`elemCS` CharSet
charPropXml11SpaceChar

charPropXml11SpaceChar                :: CharSet
charPropXml11SpaceChar :: CharSet
charPropXml11SpaceChar
    = [Char] -> CharSet
stringCS [Char
'\x09', Char
'\x0A', Char
'\x0D', Char
'\x20', Char
'\x85', Char
'\x2028']

-- |
-- checking for XML name character

isXmlNameChar :: Char -> Bool
isXmlNameChar :: Char -> Bool
isXmlNameChar Char
c                        -- optimized for ASCII chars
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z'
        = Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a'
          Bool -> Bool -> Bool
||
          ( Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z' )
          Bool -> Bool -> Bool
||
          ( Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9' )
          Bool -> Bool -> Bool
||
          Char
c Char -> [Char] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'-', Char
'.', Char
':', Char
'_']
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\183'
        = Char
c Char -> CharSet -> Bool
`elemCS` CharSet
charPropXmlNameChar 
    | Bool
otherwise
        = Bool
False
{-# INLINE isXmlNameChar #-}

charPropXmlNameChar           :: CharSet
charPropXmlNameChar :: CharSet
charPropXmlNameChar
    = CharSet
charPropXmlLetter
      CharSet -> CharSet -> CharSet
`unionCS`
      CharSet
charPropXmlDigit
      CharSet -> CharSet -> CharSet
`unionCS`
      (Char -> CharSet
singleCS Char
'\x2D' CharSet -> CharSet -> CharSet
`unionCS` Char -> CharSet
singleCS Char
'\x2E')               -- '-' | '.'
      CharSet -> CharSet -> CharSet
`unionCS`
      (Char -> CharSet
singleCS Char
'\x3A' CharSet -> CharSet -> CharSet
`unionCS` Char -> CharSet
singleCS Char
'\x5F')               -- Letter | ':' | '_'
      CharSet -> CharSet -> CharSet
`unionCS`
      CharSet
charPropXmlCombiningChar
      CharSet -> CharSet -> CharSet
`unionCS`
      CharSet
charPropXmlExtender

-- |
-- checking for XML name start character
--
-- see also : 'isXmlNameChar'

isXmlNameStartChar :: Char -> Bool
isXmlNameStartChar :: Char -> Bool
isXmlNameStartChar Char
c                                            -- optimized for ASCII chars
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z'
        = Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a'
          Bool -> Bool -> Bool
||
          ( Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z' )
          Bool -> Bool -> Bool
||
          Char
c Char -> [Char] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
':', Char
'_']
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\192'
        = Char
c Char -> CharSet -> Bool
`elemCS` CharSet
charPropXmlNameStartChar
    | Bool
otherwise
        = Bool
False
{-# INLINE isXmlNameStartChar #-}

charPropXmlNameStartChar              :: CharSet
charPropXmlNameStartChar :: CharSet
charPropXmlNameStartChar
    = CharSet
charPropXmlLetter
      CharSet -> CharSet -> CharSet
`unionCS`
      Char -> CharSet
singleCS Char
'\x3A'
      CharSet -> CharSet -> CharSet
`unionCS`
      Char -> CharSet
singleCS Char
'\x5F'           -- Letter | ':' | '_'

-- |
-- checking for XML NCName character: no \":\" allowed
--
-- see also : 'isXmlNameChar'

isXmlNCNameChar :: Char -> Bool
isXmlNCNameChar :: Char -> Bool
isXmlNCNameChar Char
c                                               -- optimized for ASCII chars
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z'
        = Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a'
          Bool -> Bool -> Bool
||
          ( Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z' )
          Bool -> Bool -> Bool
||
          ( Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'0' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9' )
          Bool -> Bool -> Bool
||
          Char
c Char -> [Char] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'-', Char
'.', Char
'_']
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\183'
        = Char
c Char -> CharSet -> Bool
`elemCS` CharSet
charPropXmlNameChar 
    | Bool
otherwise
        = Bool
False
{-# INLINE isXmlNCNameChar #-}

charPropXmlNCNameChar                 :: CharSet
charPropXmlNCNameChar :: CharSet
charPropXmlNCNameChar
    = CharSet
charPropXmlNameChar
      CharSet -> CharSet -> CharSet
`diffCS`
      Char -> CharSet
singleCS Char
'\x3A'                                           -- no :

-- |
-- checking for XML NCName start character: no \":\" allowed
--
-- see also : 'isXmlNameChar', 'isXmlNCNameChar'

isXmlNCNameStartChar :: Char -> Bool
isXmlNCNameStartChar :: Char -> Bool
isXmlNCNameStartChar Char
c                                          -- optimized for ASCII chars
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z'
        = Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'a'
          Bool -> Bool -> Bool
||
          ( Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'A' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z' )
          Bool -> Bool -> Bool
||
          Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_'
    | Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\192'
        = Char
c Char -> CharSet -> Bool
`elemCS` CharSet
charPropXmlNameStartChar
    | Bool
otherwise
        = Bool
False
{-# INLINE isXmlNCNameStartChar #-}

charPropXmlNCNameStartChar            :: CharSet
charPropXmlNCNameStartChar :: CharSet
charPropXmlNCNameStartChar
    = CharSet
charPropXmlNameStartChar
      CharSet -> CharSet -> CharSet
`diffCS`
      Char -> CharSet
singleCS Char
'\x3A'                                           -- no :

-- |
-- checking for XML public id character

isXmlPubidChar :: Char -> Bool
isXmlPubidChar :: Char -> Bool
isXmlPubidChar Char
c = Char
c Char -> CharSet -> Bool
`elemCS` CharSet
charPropXmlPubidChar

charPropXmlPubidChar          :: CharSet
charPropXmlPubidChar :: CharSet
charPropXmlPubidChar
    = Char -> Char -> CharSet
rangeCS Char
'0' Char
'9'
      CharSet -> CharSet -> CharSet
`unionCS`
      Char -> Char -> CharSet
rangeCS Char
'A' Char
'Z'
      CharSet -> CharSet -> CharSet
`unionCS`
      Char -> Char -> CharSet
rangeCS Char
'a' Char
'z'
      CharSet -> CharSet -> CharSet
`unionCS`
      [Char] -> CharSet
stringCS [Char]
" \r\n-'()+,./:=?;!*#@$_%"

-- |
-- checking for XML letter

isXmlLetter :: Char -> Bool
isXmlLetter :: Char -> Bool
isXmlLetter Char
c = Char
c Char -> CharSet -> Bool
`elemCS` CharSet
charPropXmlLetter
{-# INLINE isXmlLetter #-}

charPropXmlLetter             :: CharSet
charPropXmlLetter :: CharSet
charPropXmlLetter
    = CharSet
charPropXmlBaseChar
      CharSet -> CharSet -> CharSet
`unionCS`
      CharSet
charPropXmlIdeographicChar

-- |
-- checking for XML base charater

isXmlBaseChar :: Char -> Bool
isXmlBaseChar :: Char -> Bool
isXmlBaseChar Char
c = Char
c Char -> CharSet -> Bool
`elemCS` CharSet
charPropXmlBaseChar

charPropXmlBaseChar           :: CharSet
charPropXmlBaseChar :: CharSet
charPropXmlBaseChar
    = [ (Char
'\x0041', Char
'\x005A')
      , (Char
'\x0061', Char
'\x007A')
      , (Char
'\x00C0', Char
'\x00D6')
      , (Char
'\x00D8', Char
'\x00F6')
      , (Char
'\x00F8', Char
'\x0131')
      , (Char
'\x0134', Char
'\x013E')
      , (Char
'\x0141', Char
'\x0148')
      , (Char
'\x014A', Char
'\x017E')
      , (Char
'\x0180', Char
'\x01C3')
      , (Char
'\x01CD', Char
'\x01F0')
      , (Char
'\x01F4', Char
'\x01F5')
      , (Char
'\x01FA', Char
'\x0217')
      , (Char
'\x0250', Char
'\x02A8')
      , (Char
'\x02BB', Char
'\x02C1')
      , (Char
'\x0386', Char
'\x0386')
      , (Char
'\x0388', Char
'\x038A')
      , (Char
'\x038C', Char
'\x038C')
      , (Char
'\x038E', Char
'\x03A1')
      , (Char
'\x03A3', Char
'\x03CE')
      , (Char
'\x03D0', Char
'\x03D6')
      , (Char
'\x03DA', Char
'\x03DA')
      , (Char
'\x03DC', Char
'\x03DC')
      , (Char
'\x03DE', Char
'\x03DE')
      , (Char
'\x03E0', Char
'\x03E0')
      , (Char
'\x03E2', Char
'\x03F3')
      , (Char
'\x0401', Char
'\x040C')
      , (Char
'\x040E', Char
'\x044F')
      , (Char
'\x0451', Char
'\x045C')
      , (Char
'\x045E', Char
'\x0481')
      , (Char
'\x0490', Char
'\x04C4')
      , (Char
'\x04C7', Char
'\x04C8')
      , (Char
'\x04CB', Char
'\x04CC')
      , (Char
'\x04D0', Char
'\x04EB')
      , (Char
'\x04EE', Char
'\x04F5')
      , (Char
'\x04F8', Char
'\x04F9')
      , (Char
'\x0531', Char
'\x0556')
      , (Char
'\x0559', Char
'\x0559')
      , (Char
'\x0561', Char
'\x0586')
      , (Char
'\x05D0', Char
'\x05EA')
      , (Char
'\x05F0', Char
'\x05F2')
      , (Char
'\x0621', Char
'\x063A')
      , (Char
'\x0641', Char
'\x064A')
      , (Char
'\x0671', Char
'\x06B7')
      , (Char
'\x06BA', Char
'\x06BE')
      , (Char
'\x06C0', Char
'\x06CE')
      , (Char
'\x06D0', Char
'\x06D3')
      , (Char
'\x06D5', Char
'\x06D5')
      , (Char
'\x06E5', Char
'\x06E6')
      , (Char
'\x0905', Char
'\x0939')
      , (Char
'\x093D', Char
'\x093D')
      , (Char
'\x0958', Char
'\x0961')
      , (Char
'\x0985', Char
'\x098C')
      , (Char
'\x098F', Char
'\x0990')
      , (Char
'\x0993', Char
'\x09A8')
      , (Char
'\x09AA', Char
'\x09B0')
      , (Char
'\x09B2', Char
'\x09B2')
      , (Char
'\x09B6', Char
'\x09B9')
      , (Char
'\x09DC', Char
'\x09DD')
      , (Char
'\x09DF', Char
'\x09E1')
      , (Char
'\x09F0', Char
'\x09F1')
      , (Char
'\x0A05', Char
'\x0A0A')
      , (Char
'\x0A0F', Char
'\x0A10')
      , (Char
'\x0A13', Char
'\x0A28')
      , (Char
'\x0A2A', Char
'\x0A30')
      , (Char
'\x0A32', Char
'\x0A33')
      , (Char
'\x0A35', Char
'\x0A36')
      , (Char
'\x0A38', Char
'\x0A39')
      , (Char
'\x0A59', Char
'\x0A5C')
      , (Char
'\x0A5E', Char
'\x0A5E')
      , (Char
'\x0A72', Char
'\x0A74')
      , (Char
'\x0A85', Char
'\x0A8B')
      , (Char
'\x0A8D', Char
'\x0A8D')
      , (Char
'\x0A8F', Char
'\x0A91')
      , (Char
'\x0A93', Char
'\x0AA8')
      , (Char
'\x0AAA', Char
'\x0AB0')
      , (Char
'\x0AB2', Char
'\x0AB3')
      , (Char
'\x0AB5', Char
'\x0AB9')
      , (Char
'\x0ABD', Char
'\x0ABD')
      , (Char
'\x0AE0', Char
'\x0AE0')
      , (Char
'\x0B05', Char
'\x0B0C')
      , (Char
'\x0B0F', Char
'\x0B10')
      , (Char
'\x0B13', Char
'\x0B28')
      , (Char
'\x0B2A', Char
'\x0B30')
      , (Char
'\x0B32', Char
'\x0B33')
      , (Char
'\x0B36', Char
'\x0B39')
      , (Char
'\x0B3D', Char
'\x0B3D')
      , (Char
'\x0B5C', Char
'\x0B5D')
      , (Char
'\x0B5F', Char
'\x0B61')
      , (Char
'\x0B85', Char
'\x0B8A')
      , (Char
'\x0B8E', Char
'\x0B90')
      , (Char
'\x0B92', Char
'\x0B95')
      , (Char
'\x0B99', Char
'\x0B9A')
      , (Char
'\x0B9C', Char
'\x0B9C')
      , (Char
'\x0B9E', Char
'\x0B9F')
      , (Char
'\x0BA3', Char
'\x0BA4')
      , (Char
'\x0BA8', Char
'\x0BAA')
      , (Char
'\x0BAE', Char
'\x0BB5')
      , (Char
'\x0BB7', Char
'\x0BB9')
      , (Char
'\x0C05', Char
'\x0C0C')
      , (Char
'\x0C0E', Char
'\x0C10')
      , (Char
'\x0C12', Char
'\x0C28')
      , (Char
'\x0C2A', Char
'\x0C33')
      , (Char
'\x0C35', Char
'\x0C39')
      , (Char
'\x0C60', Char
'\x0C61')
      , (Char
'\x0C85', Char
'\x0C8C')
      , (Char
'\x0C8E', Char
'\x0C90')
      , (Char
'\x0C92', Char
'\x0CA8')
      , (Char
'\x0CAA', Char
'\x0CB3')
      , (Char
'\x0CB5', Char
'\x0CB9')
      , (Char
'\x0CDE', Char
'\x0CDE')
      , (Char
'\x0CE0', Char
'\x0CE1')
      , (Char
'\x0D05', Char
'\x0D0C')
      , (Char
'\x0D0E', Char
'\x0D10')
      , (Char
'\x0D12', Char
'\x0D28')
      , (Char
'\x0D2A', Char
'\x0D39')
      , (Char
'\x0D60', Char
'\x0D61')
      , (Char
'\x0E01', Char
'\x0E2E')
      , (Char
'\x0E30', Char
'\x0E30')
      , (Char
'\x0E32', Char
'\x0E33')
      , (Char
'\x0E40', Char
'\x0E45')
      , (Char
'\x0E81', Char
'\x0E82')
      , (Char
'\x0E84', Char
'\x0E84')
      , (Char
'\x0E87', Char
'\x0E88')
      , (Char
'\x0E8A', Char
'\x0E8A')
      , (Char
'\x0E8D', Char
'\x0E8D')
      , (Char
'\x0E94', Char
'\x0E97')
      , (Char
'\x0E99', Char
'\x0E9F')
      , (Char
'\x0EA1', Char
'\x0EA3')
      , (Char
'\x0EA5', Char
'\x0EA5')
      , (Char
'\x0EA7', Char
'\x0EA7')
      , (Char
'\x0EAA', Char
'\x0EAB')
      , (Char
'\x0EAD', Char
'\x0EAE')
      , (Char
'\x0EB0', Char
'\x0EB0')
      , (Char
'\x0EB2', Char
'\x0EB3')
      , (Char
'\x0EBD', Char
'\x0EBD')
      , (Char
'\x0EC0', Char
'\x0EC4')
      , (Char
'\x0F40', Char
'\x0F47')
      , (Char
'\x0F49', Char
'\x0F69')
      , (Char
'\x10A0', Char
'\x10C5')
      , (Char
'\x10D0', Char
'\x10F6')
      , (Char
'\x1100', Char
'\x1100')
      , (Char
'\x1102', Char
'\x1103')
      , (Char
'\x1105', Char
'\x1107')
      , (Char
'\x1109', Char
'\x1109')
      , (Char
'\x110B', Char
'\x110C')
      , (Char
'\x110E', Char
'\x1112')
      , (Char
'\x113C', Char
'\x113C')
      , (Char
'\x113E', Char
'\x113E')
      , (Char
'\x1140', Char
'\x1140')
      , (Char
'\x114C', Char
'\x114C')
      , (Char
'\x114E', Char
'\x114E')
      , (Char
'\x1150', Char
'\x1150')
      , (Char
'\x1154', Char
'\x1155')
      , (Char
'\x1159', Char
'\x1159')
      , (Char
'\x115F', Char
'\x1161')
      , (Char
'\x1163', Char
'\x1163')
      , (Char
'\x1165', Char
'\x1165')
      , (Char
'\x1167', Char
'\x1167')
      , (Char
'\x1169', Char
'\x1169')
      , (Char
'\x116D', Char
'\x116E')
      , (Char
'\x1172', Char
'\x1173')
      , (Char
'\x1175', Char
'\x1175')
      , (Char
'\x119E', Char
'\x119E')
      , (Char
'\x11A8', Char
'\x11A8')
      , (Char
'\x11AB', Char
'\x11AB')
      , (Char
'\x11AE', Char
'\x11AF')
      , (Char
'\x11B7', Char
'\x11B8')
      , (Char
'\x11BA', Char
'\x11BA')
      , (Char
'\x11BC', Char
'\x11C2')
      , (Char
'\x11EB', Char
'\x11EB')
      , (Char
'\x11F0', Char
'\x11F0')
      , (Char
'\x11F9', Char
'\x11F9')
      , (Char
'\x1E00', Char
'\x1E9B')
      , (Char
'\x1EA0', Char
'\x1EF9')
      , (Char
'\x1F00', Char
'\x1F15')
      , (Char
'\x1F18', Char
'\x1F1D')
      , (Char
'\x1F20', Char
'\x1F45')
      , (Char
'\x1F48', Char
'\x1F4D')
      , (Char
'\x1F50', Char
'\x1F57')
      , (Char
'\x1F59', Char
'\x1F59')
      , (Char
'\x1F5B', Char
'\x1F5B')
      , (Char
'\x1F5D', Char
'\x1F5D')
      , (Char
'\x1F5F', Char
'\x1F7D')
      , (Char
'\x1F80', Char
'\x1FB4')
      , (Char
'\x1FB6', Char
'\x1FBC')
      , (Char
'\x1FBE', Char
'\x1FBE')
      , (Char
'\x1FC2', Char
'\x1FC4')
      , (Char
'\x1FC6', Char
'\x1FCC')
      , (Char
'\x1FD0', Char
'\x1FD3')
      , (Char
'\x1FD6', Char
'\x1FDB')
      , (Char
'\x1FE0', Char
'\x1FEC')
      , (Char
'\x1FF2', Char
'\x1FF4')
      , (Char
'\x1FF6', Char
'\x1FFC')
      , (Char
'\x2126', Char
'\x2126')
      , (Char
'\x212A', Char
'\x212B')
      , (Char
'\x212E', Char
'\x212E')
      , (Char
'\x2180', Char
'\x2182')
      , (Char
'\x3041', Char
'\x3094')
      , (Char
'\x30A1', Char
'\x30FA')
      , (Char
'\x3105', Char
'\x312C')
      , (Char
'\xAC00', Char
'\xD7A3')
      ]

-- |
-- checking for XML ideographic charater

isXmlIdeographicChar :: Char -> Bool
isXmlIdeographicChar :: Char -> Bool
isXmlIdeographicChar Char
c = Char
c Char -> CharSet -> Bool
`elemCS` CharSet
charPropXmlIdeographicChar
{-# INLINE isXmlIdeographicChar #-}

charPropXmlIdeographicChar    :: CharSet
charPropXmlIdeographicChar :: CharSet
charPropXmlIdeographicChar
    = [ (Char
'\x3007', Char
'\x3007')
      , (Char
'\x3021', Char
'\x3029')
      , (Char
'\x4E00', Char
'\x9FA5')
      ]

-- |
-- checking for XML combining charater

isXmlCombiningChar :: Char -> Bool
isXmlCombiningChar :: Char -> Bool
isXmlCombiningChar Char
c = Char
c Char -> CharSet -> Bool
`elemCS` CharSet
charPropXmlCombiningChar

charPropXmlCombiningChar      :: CharSet
charPropXmlCombiningChar :: CharSet
charPropXmlCombiningChar
    = [ (Char
'\x0300', Char
'\x0345')
      , (Char
'\x0360', Char
'\x0361')
      , (Char
'\x0483', Char
'\x0486')
      , (Char
'\x0591', Char
'\x05A1')
      , (Char
'\x05A3', Char
'\x05B9')
      , (Char
'\x05BB', Char
'\x05BD')
      , (Char
'\x05BF', Char
'\x05BF')
      , (Char
'\x05C1', Char
'\x05C2')
      , (Char
'\x05C4', Char
'\x05C4')
      , (Char
'\x064B', Char
'\x0652')
      , (Char
'\x0670', Char
'\x0670')
      , (Char
'\x06D6', Char
'\x06DC')
      , (Char
'\x06DD', Char
'\x06DF')
      , (Char
'\x06E0', Char
'\x06E4')
      , (Char
'\x06E7', Char
'\x06E8')
      , (Char
'\x06EA', Char
'\x06ED')
      , (Char
'\x0901', Char
'\x0903')
      , (Char
'\x093C', Char
'\x093C')
      , (Char
'\x093E', Char
'\x094C')
      , (Char
'\x094D', Char
'\x094D')
      , (Char
'\x0951', Char
'\x0954')
      , (Char
'\x0962', Char
'\x0963')
      , (Char
'\x0981', Char
'\x0983')
      , (Char
'\x09BC', Char
'\x09BC')
      , (Char
'\x09BE', Char
'\x09BE')
      , (Char
'\x09BF', Char
'\x09BF')
      , (Char
'\x09C0', Char
'\x09C4')
      , (Char
'\x09C7', Char
'\x09C8')
      , (Char
'\x09CB', Char
'\x09CD')
      , (Char
'\x09D7', Char
'\x09D7')
      , (Char
'\x09E2', Char
'\x09E3')
      , (Char
'\x0A02', Char
'\x0A02')
      , (Char
'\x0A3C', Char
'\x0A3C')
      , (Char
'\x0A3E', Char
'\x0A3E')
      , (Char
'\x0A3F', Char
'\x0A3F')
      , (Char
'\x0A40', Char
'\x0A42')
      , (Char
'\x0A47', Char
'\x0A48')
      , (Char
'\x0A4B', Char
'\x0A4D')
      , (Char
'\x0A70', Char
'\x0A71')
      , (Char
'\x0A81', Char
'\x0A83')
      , (Char
'\x0ABC', Char
'\x0ABC')
      , (Char
'\x0ABE', Char
'\x0AC5')
      , (Char
'\x0AC7', Char
'\x0AC9')
      , (Char
'\x0ACB', Char
'\x0ACD')
      , (Char
'\x0B01', Char
'\x0B03')
      , (Char
'\x0B3C', Char
'\x0B3C')
      , (Char
'\x0B3E', Char
'\x0B43')
      , (Char
'\x0B47', Char
'\x0B48')
      , (Char
'\x0B4B', Char
'\x0B4D')
      , (Char
'\x0B56', Char
'\x0B57')
      , (Char
'\x0B82', Char
'\x0B83')
      , (Char
'\x0BBE', Char
'\x0BC2')
      , (Char
'\x0BC6', Char
'\x0BC8')
      , (Char
'\x0BCA', Char
'\x0BCD')
      , (Char
'\x0BD7', Char
'\x0BD7')
      , (Char
'\x0C01', Char
'\x0C03')
      , (Char
'\x0C3E', Char
'\x0C44')
      , (Char
'\x0C46', Char
'\x0C48')
      , (Char
'\x0C4A', Char
'\x0C4D')
      , (Char
'\x0C55', Char
'\x0C56')
      , (Char
'\x0C82', Char
'\x0C83')
      , (Char
'\x0CBE', Char
'\x0CC4')
      , (Char
'\x0CC6', Char
'\x0CC8')
      , (Char
'\x0CCA', Char
'\x0CCD')
      , (Char
'\x0CD5', Char
'\x0CD6')
      , (Char
'\x0D02', Char
'\x0D03')
      , (Char
'\x0D3E', Char
'\x0D43')
      , (Char
'\x0D46', Char
'\x0D48')
      , (Char
'\x0D4A', Char
'\x0D4D')
      , (Char
'\x0D57', Char
'\x0D57')
      , (Char
'\x0E31', Char
'\x0E31')
      , (Char
'\x0E34', Char
'\x0E3A')
      , (Char
'\x0E47', Char
'\x0E4E')
      , (Char
'\x0EB1', Char
'\x0EB1')
      , (Char
'\x0EB4', Char
'\x0EB9')
      , (Char
'\x0EBB', Char
'\x0EBC')
      , (Char
'\x0EC8', Char
'\x0ECD')
      , (Char
'\x0F18', Char
'\x0F19')
      , (Char
'\x0F35', Char
'\x0F35')
      , (Char
'\x0F37', Char
'\x0F37')
      , (Char
'\x0F39', Char
'\x0F39')
      , (Char
'\x0F3E', Char
'\x0F3E')
      , (Char
'\x0F3F', Char
'\x0F3F')
      , (Char
'\x0F71', Char
'\x0F84')
      , (Char
'\x0F86', Char
'\x0F8B')
      , (Char
'\x0F90', Char
'\x0F95')
      , (Char
'\x0F97', Char
'\x0F97')
      , (Char
'\x0F99', Char
'\x0FAD')
      , (Char
'\x0FB1', Char
'\x0FB7')
      , (Char
'\x0FB9', Char
'\x0FB9')
      , (Char
'\x20D0', Char
'\x20DC')
      , (Char
'\x20E1', Char
'\x20E1')
      , (Char
'\x302A', Char
'\x302F')
      , (Char
'\x3099', Char
'\x3099')
      , (Char
'\x309A', Char
'\x309A')
      ]

-- |
-- checking for XML digit

isXmlDigit :: Char -> Bool
isXmlDigit :: Char -> Bool
isXmlDigit Char
c = Char
c Char -> CharSet -> Bool
`elemCS` CharSet
charPropXmlDigit

charPropXmlDigit              :: CharSet
charPropXmlDigit :: CharSet
charPropXmlDigit
    = [ (Char
'\x0030', Char
'\x0039')
      , (Char
'\x0660', Char
'\x0669')
      , (Char
'\x06F0', Char
'\x06F9')
      , (Char
'\x0966', Char
'\x096F')
      , (Char
'\x09E6', Char
'\x09EF')
      , (Char
'\x0A66', Char
'\x0A6F')
      , (Char
'\x0AE6', Char
'\x0AEF')
      , (Char
'\x0B66', Char
'\x0B6F')
      , (Char
'\x0BE7', Char
'\x0BEF')
      , (Char
'\x0C66', Char
'\x0C6F')
      , (Char
'\x0CE6', Char
'\x0CEF')
      , (Char
'\x0D66', Char
'\x0D6F')
      , (Char
'\x0E50', Char
'\x0E59')
      , (Char
'\x0ED0', Char
'\x0ED9')
      , (Char
'\x0F20', Char
'\x0F29')
      ]

-- |
-- checking for XML extender

isXmlExtender :: Char -> Bool
isXmlExtender :: Char -> Bool
isXmlExtender Char
c = Char
c Char -> CharSet -> Bool
`elemCS` CharSet
charPropXmlExtender

charPropXmlExtender           :: CharSet
charPropXmlExtender :: CharSet
charPropXmlExtender
    = [ (Char
'\x00B7', Char
'\x00B7')
      , (Char
'\x02D0', Char
'\x02D0')
      , (Char
'\x02D1', Char
'\x02D1')
      , (Char
'\x0387', Char
'\x0387')
      , (Char
'\x0640', Char
'\x0640')
      , (Char
'\x0E46', Char
'\x0E46')
      , (Char
'\x0EC6', Char
'\x0EC6')
      , (Char
'\x3005', Char
'\x3005')
      , (Char
'\x3031', Char
'\x3035')
      , (Char
'\x309D', Char
'\x309E')
      , (Char
'\x30FC', Char
'\x30FE')
      ]

-- |
-- checking for XML control or permanently discouraged char
--
-- see Errata to XML1.0 (http:\/\/www.w3.org\/XML\/xml-V10-2e-errata) No 46
--
-- Document authors are encouraged to avoid "compatibility characters",
-- as defined in section 6.8 of [Unicode] (see also D21 in section 3.6 of [Unicode3]).
-- The characters defined in the following ranges are also discouraged.
-- They are either control characters or permanently undefined Unicode characters:


isXmlControlOrPermanentlyUndefined :: Char -> Bool
isXmlControlOrPermanentlyUndefined :: Char -> Bool
isXmlControlOrPermanentlyUndefined Char
c = Char
c Char -> CharSet -> Bool
`elemCS` CharSet
charPropXmlControlOrPermanentlyUndefined

charPropXmlControlOrPermanentlyUndefined      :: CharSet
charPropXmlControlOrPermanentlyUndefined :: CharSet
charPropXmlControlOrPermanentlyUndefined
    = [ (Char
'\x7F', Char
'\x84')
      , (Char
'\x86', Char
'\x9F')
      , (Char
'\xFDD0', Char
'\xFDDF')
      , (Char
'\x1FFFE', Char
'\x1FFFF')
      , (Char
'\x2FFFE', Char
'\x2FFFF')
      , (Char
'\x3FFFE', Char
'\x3FFFF')
      , (Char
'\x4FFFE', Char
'\x4FFFF')
      , (Char
'\x5FFFE', Char
'\x5FFFF')
      , (Char
'\x6FFFE', Char
'\x6FFFF')
      , (Char
'\x7FFFE', Char
'\x7FFFF')
      , (Char
'\x8FFFE', Char
'\x8FFFF')
      , (Char
'\x9FFFE', Char
'\x9FFFF')
      , (Char
'\xAFFFE', Char
'\xAFFFF')
      , (Char
'\xBFFFE', Char
'\xBFFFF')
      , (Char
'\xCFFFE', Char
'\xCFFFF')
      , (Char
'\xDFFFE', Char
'\xDFFFF')
      , (Char
'\xEFFFE', Char
'\xEFFFF')
      , (Char
'\xFFFFE', Char
'\xFFFFF')
      , (Char
'\x10FFFE', Char
'\x10FFFF')
      ]

-- ------------------------------------------------------------