-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.Parser.XmlCharParser
   Copyright  : Copyright (C) 2010 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : stable
   Portability: portable

   UTF-8 character parser and simple XML token parsers
-}

-- ------------------------------------------------------------

module Text.XML.HXT.Parser.XmlCharParser
    ( XParser
    , SimpleXParser
    , XPState(..)
    , withNormNewline
    , withoutNormNewline

    , xmlChar                   -- xml char parsers
    , xmlNameChar
    , xmlNameStartChar
    , xmlNCNameChar
    , xmlNCNameStartChar
    , xmlLetter
    , xmlSpaceChar
    , xmlCRLFChar
    )
where

import           Data.Char.Properties.XMLCharProps (isXmlCharCR, isXmlLetter,
                                                    isXmlNCNameChar,
                                                    isXmlNCNameStartChar,
                                                    isXmlNameChar,
                                                    isXmlNameStartChar,
                                                    isXmlSpaceCharCR)

import           Data.String.Unicode

import           Text.ParserCombinators.Parsec

-- ------------------------------------------------------------

type XParser s a        = GenParser Char (XPState s) a
type SimpleXParser a    = XParser () a

data XPState s          = XPState
    { forall s. XPState s -> Bool
xps_normalizeNewline :: !Bool
    , forall s. XPState s -> s
xps_userState        ::  s
    }

withNormNewline         :: a -> XPState a
withNormNewline :: forall a. a -> XPState a
withNormNewline a
x       = Bool -> a -> XPState a
forall s. Bool -> s -> XPState s
XPState Bool
True a
x

withoutNormNewline      :: a -> XPState a
withoutNormNewline :: forall a. a -> XPState a
withoutNormNewline a
x    = Bool -> a -> XPState a
forall s. Bool -> s -> XPState s
XPState Bool
False a
x

-- ------------------------------------------------------------
--
-- Char (2.2)
--

-- |
-- parse a single Unicode character

xmlChar                 :: XParser s Unicode
xmlChar :: forall s. XParser s Unicode
xmlChar                 = ( (Unicode -> Bool) -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s (m :: * -> *) u.
Stream s m Unicode =>
(Unicode -> Bool) -> ParsecT s u m Unicode
satisfy Unicode -> Bool
isXmlCharCR
                            ParsecT [Unicode] (XPState s) Identity Unicode
-> ParsecT [Unicode] (XPState s) Identity Unicode
-> ParsecT [Unicode] (XPState s) Identity Unicode
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
                            ParsecT [Unicode] (XPState s) Identity Unicode
forall s. XParser s Unicode
xmlCRLFChar
                          )
                          ParsecT [Unicode] (XPState s) Identity Unicode
-> [Unicode] -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Unicode] -> ParsecT s u m a
<?> [Unicode]
"legal XML character"
{-# INLINE xmlChar #-}

-- |
-- parse a XML name character

xmlNameChar             :: XParser s Unicode
xmlNameChar :: forall s. XParser s Unicode
xmlNameChar             = (Unicode -> Bool) -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s (m :: * -> *) u.
Stream s m Unicode =>
(Unicode -> Bool) -> ParsecT s u m Unicode
satisfy Unicode -> Bool
isXmlNameChar ParsecT [Unicode] (XPState s) Identity Unicode
-> [Unicode] -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Unicode] -> ParsecT s u m a
<?> [Unicode]
"legal XML name character"
{-# INLINE xmlNameChar #-}

-- |
-- parse a XML name start character

xmlNameStartChar        :: XParser s Unicode
xmlNameStartChar :: forall s. XParser s Unicode
xmlNameStartChar        = (Unicode -> Bool) -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s (m :: * -> *) u.
Stream s m Unicode =>
(Unicode -> Bool) -> ParsecT s u m Unicode
satisfy Unicode -> Bool
isXmlNameStartChar ParsecT [Unicode] (XPState s) Identity Unicode
-> [Unicode] -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Unicode] -> ParsecT s u m a
<?> [Unicode]
"legal XML name start character"
{-# INLINE xmlNameStartChar #-}

-- |
-- parse a XML NCName character

xmlNCNameChar           :: XParser s Unicode
xmlNCNameChar :: forall s. XParser s Unicode
xmlNCNameChar           = (Unicode -> Bool) -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s (m :: * -> *) u.
Stream s m Unicode =>
(Unicode -> Bool) -> ParsecT s u m Unicode
satisfy Unicode -> Bool
isXmlNCNameChar ParsecT [Unicode] (XPState s) Identity Unicode
-> [Unicode] -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Unicode] -> ParsecT s u m a
<?> [Unicode]
"legal XML NCName character"
{-# INLINE xmlNCNameChar #-}

-- |
-- parse a XML NCName start character

xmlNCNameStartChar      :: XParser s Unicode
xmlNCNameStartChar :: forall s. XParser s Unicode
xmlNCNameStartChar      = (Unicode -> Bool) -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s (m :: * -> *) u.
Stream s m Unicode =>
(Unicode -> Bool) -> ParsecT s u m Unicode
satisfy Unicode -> Bool
isXmlNCNameStartChar ParsecT [Unicode] (XPState s) Identity Unicode
-> [Unicode] -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Unicode] -> ParsecT s u m a
<?> [Unicode]
"legal XML NCName start character"
{-# INLINE xmlNCNameStartChar #-}

-- |
-- parse a XML letter character

xmlLetter               :: XParser s Unicode
xmlLetter :: forall s. XParser s Unicode
xmlLetter               = (Unicode -> Bool) -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s (m :: * -> *) u.
Stream s m Unicode =>
(Unicode -> Bool) -> ParsecT s u m Unicode
satisfy Unicode -> Bool
isXmlLetter ParsecT [Unicode] (XPState s) Identity Unicode
-> [Unicode] -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Unicode] -> ParsecT s u m a
<?> [Unicode]
"legal XML letter"
{-# INLINE xmlLetter #-}

-- |
-- White Space (2.3)
--
-- end of line handling (2.11) will be done before or with 'xmlCRLFChar' parser

xmlSpaceChar            :: XParser s Char
xmlSpaceChar :: forall s. XParser s Unicode
xmlSpaceChar            = ( (Unicode -> Bool) -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s (m :: * -> *) u.
Stream s m Unicode =>
(Unicode -> Bool) -> ParsecT s u m Unicode
satisfy Unicode -> Bool
isXmlSpaceCharCR
                            ParsecT [Unicode] (XPState s) Identity Unicode
-> ParsecT [Unicode] (XPState s) Identity Unicode
-> ParsecT [Unicode] (XPState s) Identity Unicode
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
                            ParsecT [Unicode] (XPState s) Identity Unicode
forall s. XParser s Unicode
xmlCRLFChar
                          )
                          ParsecT [Unicode] (XPState s) Identity Unicode
-> [Unicode] -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Unicode] -> ParsecT s u m a
<?> [Unicode]
"white space"
{-# INLINE xmlSpaceChar #-}

-- |
-- White Space Normalization
--
-- end of line handling (2.11)
-- \#x0D and \#x0D\#x0A are mapped to \#x0A

xmlCRLFChar            :: XParser s Char
xmlCRLFChar :: forall s. XParser s Unicode
xmlCRLFChar            = ( do
                           Unicode
_ <- Unicode -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s (m :: * -> *) u.
Stream s m Unicode =>
Unicode -> ParsecT s u m Unicode
char Unicode
'\r'
                           XPState s
s <- ParsecT [Unicode] (XPState s) Identity (XPState s)
forall (m :: * -> *) s u. Monad m => ParsecT s u m u
getState
                           if XPState s -> Bool
forall s. XPState s -> Bool
xps_normalizeNewline XPState s
s
                              then Unicode
-> ParsecT [Unicode] (XPState s) Identity Unicode
-> ParsecT [Unicode] (XPState s) Identity Unicode
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option Unicode
'\n' (Unicode -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s (m :: * -> *) u.
Stream s m Unicode =>
Unicode -> ParsecT s u m Unicode
char Unicode
'\n')
                              else Unicode -> ParsecT [Unicode] (XPState s) Identity Unicode
forall a. a -> ParsecT [Unicode] (XPState s) Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return Unicode
'\r'
                         )
                         ParsecT [Unicode] (XPState s) Identity Unicode
-> [Unicode] -> ParsecT [Unicode] (XPState s) Identity Unicode
forall s u (m :: * -> *) a.
ParsecT s u m a -> [Unicode] -> ParsecT s u m a
<?> [Unicode]
"newline"

-- ------------------------------------------------------------