-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.Arrow.ReadDocument
   Copyright  : Copyright (C) 2005-2013 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : stable
   Portability: portable

   Compound arrows for reading an XML\/HTML document or an XML\/HTML string

-}

-- ------------------------------------------------------------

module Text.XML.HXT.Arrow.ReadDocument
    ( readDocument
    , readFromDocument
    , readString
    , readFromString
    , hread
    , hreadDoc
    , xread
    , xreadDoc
    )
where

import Control.Arrow.ListArrows

import Data.Maybe                               ( fromMaybe )
import qualified Data.Map                       as M


import Text.XML.HXT.DOM.Interface

import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.Edit                  ( canonicalizeAllNodes
                                                , canonicalizeForXPath
                                                , canonicalizeContents
                                                , rememberDTDAttrl
                                                , removeDocWhiteSpace
                                                )
import qualified Text.XML.HXT.Arrow.ParserInterface as PI
import Text.XML.HXT.Arrow.ProcessDocument       ( getDocumentContents
                                                , parseXmlDocument
                                                , parseXmlDocumentWithExpat
                                                , parseHtmlDocument
                                                , propagateAndValidateNamespaces
                                                , andValidateNamespaces
                                                )
import Text.XML.HXT.Arrow.XmlState
import Text.XML.HXT.Arrow.XmlState.TypeDefs

-- ------------------------------------------------------------
--
{- |
the main document input filter

this filter can be configured by a list of configuration options,
a value of type 'Text.XML.HXT.XmlState.TypeDefs.SysConfig'

for all available options see module 'Text.XML.HXT.Arrow.XmlState.SystemConfig'

- @withValidate yes\/no@ :
  switch on\/off DTD validation. Only for XML parsed documents, not for HTML parsing.

- @withSubstDTDEntities yes\/no@ :
  switch on\/off entity substitution for general entities defined in DTD validation.
  Default is @yes@.
  Switching this option and the validation off can lead to faster parsing, in that case
  reading the DTD documents is not longer necessary.
  Only used with XML parsed documents, not with HTML parsing.

- @withSubstHTMLEntities yes\/no@ :
  switch on\/off entity substitution for general entities defined in HTML validation.
  Default is @no@.
  Switching this option on and the validation and substDTDEntities off can lead to faster parsing,
  in that case
  reading the DTD documents is not longer necessary, HTML general entities are still substituted.
  Only used with XML parsed documents, not with HTML parsing.

- @withParseHTML yes\/no@ :
  switch on HTML parsing.

- @withParseByMimeType yes\/no@ :
  select XML\/HTML parser by document mime type.
  text\/xml and text\/xhtml are parsed as XML, text\/html as HTML.

- @withCheckNamespaces yes\/no@ :
  Switch on\/off namespace propagation and checking

- @withInputEncoding \<encoding-spec\>@ :
  Set default encoding.

- @withTagSoup@ :
  use light weight and lazy parser based on tagsoup lib.
  This is only available when package hxt-tagsoup is installed and
  the source contains an @import Text.XML.HXT.TagSoup@.

- @withRelaxNG \<schema.rng\>@ :
  validate document with Relax NG, the parameter is for the schema URI.
  This implies using XML parser, no validation against DTD, and canonicalisation.

- @withCurl [\<curl-option\>...]@ :
  Use the libCurl binding for HTTP access.
  This is only available when package hxt-curl is installed and
  the source contains an @import Text.XML.HXT.Curl@.
                   
- @withHTTP [\<http-option\>...]@ :
  Use the Haskell HTTP package for HTTP access.
  This is only available when package hxt-http is installed and
  the source contains an @import Text.XML.HXT.HTTP@.

examples:

> readDocument [] "test.xml"

reads and validates a document \"test.xml\", no namespace propagation, only canonicalization is performed

> ...
> import Text.XML.HXT.Curl
> ...
>
> readDocument [ withValidate        no
>              , withInputEncoding   isoLatin1
>              , withParseByMimeType yes
>              , withCurl []
>              ] "http://localhost/test.php"

reads document \"test.php\", parses it as HTML or XML depending on the mimetype given from the server, but without validation, default encoding 'isoLatin1'.
HTTP access is done via libCurl.

> readDocument [ withParseHTML       yes
>              , withInputEncoding   isoLatin1
>              ] ""

reads a HTML document from standard input, no validation is done when parsing HTML, default encoding is 'isoLatin1',

> readDocument [ withInputEncoding  isoLatin1
>              , withValidate       no
>              , withMimeTypeFile   "/etc/mime.types"
>              , withStrictInput    yes
>              ] "test.svg"

reads an SVG document from \"test.svg\", sets the mime type by looking in the system mimetype config file,
default encoding is 'isoLatin1',

> ...
> import Text.XML.HXT.Curl
> import Text.XML.HXT.TagSoup
> ...
>
> readDocument [ withParseHTML      yes
>              , withTagSoup
>              , withProxy          "www-cache:3128"
>              , withCurl           []
>              , withWarnings       no
>              ] "http://www.haskell.org/"

reads Haskell homepage with HTML parser, ignoring any warnings
(at the time of writing, there were some HTML errors),
with http access via libCurl interface
and proxy \"www-cache\" at port 3128,
parsing is done with tagsoup HTML parser.
This requires packages \"hxt-curl\" and \"hxt-tagsoup\" to be installed

> readDocument [ withValidate          yes
>              , withCheckNamespaces   yes
>              , withRemoveWS          yes
>              , withTrace             2
>              , withHTTP              []
>              ] "http://www.w3c.org/"

read w3c home page (xhtml), validate and check namespaces, remove whitespace between tags,
trace activities with level 2.
HTTP access is done with Haskell HTTP package

> readDocument [ withValidate          no
>              , withSubstDTDEntities  no
>              ...
>              ] "http://www.w3c.org/"

read w3c home page (xhtml), but without accessing the DTD given in that document.
Only the predefined XML general entity refs are substituted.

> readDocument [ withValidate          no
>              , withSubstDTDEntities  no
>              , withSubstHTMLEntities yes
>              ...
>              ] "http://www.w3c.org/"

same as above, but with substituion of all general entity refs defined in XHTML.

for minimal complete examples see 'Text.XML.HXT.Arrow.WriteDocument.writeDocument'
and 'runX', the main starting point for running an XML arrow.
-}

readDocument    :: SysConfigList -> String -> IOStateArrow s b XmlTree
readDocument :: forall s b. SysConfigList -> String -> IOStateArrow s b XmlTree
readDocument SysConfigList
config String
src
    = IOStateArrow s b XmlTree -> IOStateArrow s b XmlTree
forall s a b. IOStateArrow s a b -> IOStateArrow s a b
localSysEnv
      (IOStateArrow s b XmlTree -> IOStateArrow s b XmlTree)
-> IOStateArrow s b XmlTree -> IOStateArrow s b XmlTree
forall a b. (a -> b) -> a -> b
$
      SysConfigList -> String -> IOStateArrow s b XmlTree
forall s b. SysConfigList -> String -> IOStateArrow s b XmlTree
readDocument' SysConfigList
config String
src

readDocument'   :: SysConfigList -> String -> IOStateArrow s b XmlTree
readDocument' :: forall s b. SysConfigList -> String -> IOStateArrow s b XmlTree
readDocument' SysConfigList
config String
src
    = SysConfigList -> IOStateArrow s b b
forall s c. SysConfigList -> IOStateArrow s c c
configSysVars SysConfigList
config
      IOStateArrow s b b
-> IOSLA (XIOState s) b XmlTree -> IOSLA (XIOState s) b XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      Bool -> IOSLA (XIOState s) b XmlTree
forall {s0} {a}. Bool -> IOSLA (XIOState s0) a XmlTree
readD (Bool -> IOSLA (XIOState s) b XmlTree)
-> IOSLA (XIOState s) b Bool -> IOSLA (XIOState s) b XmlTree
forall c b d.
(c -> IOSLA (XIOState s) b d)
-> IOSLA (XIOState s) b c -> IOSLA (XIOState s) b d
forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< Selector XIOSysState Bool -> IOSLA (XIOState s) b Bool
forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState Bool
theWithCache
    where
    readD :: Bool -> IOSLA (XIOState s0) a XmlTree
readD Bool
True
        = XmlTree -> IOSLA (XIOState s0) a XmlTree
forall c b. c -> IOSLA (XIOState s0) b c
forall (a :: * -> * -> *) c b. ArrowList a => c -> a b c
constA XmlTree
forall a. HasCallStack => a
undefined              -- just for generalizing the signature to: IOStateArrow s b       XmlTree
          IOSLA (XIOState s0) a XmlTree
-> IOSLA (XIOState s0) XmlTree XmlTree
-> IOSLA (XIOState s0) a XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>                           -- instead of                              IOStateArrow s XmlTree XmlTree
          (IOSArrow XmlTree XmlTree -> IOSLA (XIOState s0) XmlTree XmlTree
forall b c s0. IOSArrow b c -> IOStateArrow s0 b c
withoutUserState (IOSArrow XmlTree XmlTree -> IOSLA (XIOState s0) XmlTree XmlTree)
-> IOSLA (XIOState s0) XmlTree (IOSArrow XmlTree XmlTree)
-> IOSLA (XIOState s0) XmlTree XmlTree
forall c b d.
(c -> IOSLA (XIOState s0) b d)
-> IOSLA (XIOState s0) b c -> IOSLA (XIOState s0) b d
forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< (Selector XIOSysState (String -> IOSArrow XmlTree XmlTree)
-> IOStateArrow s0 XmlTree (String -> IOSArrow XmlTree XmlTree)
forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState (String -> IOSArrow XmlTree XmlTree)
theCacheRead IOStateArrow s0 XmlTree (String -> IOSArrow XmlTree XmlTree)
-> ((String -> IOSArrow XmlTree XmlTree)
    -> IOSArrow XmlTree XmlTree)
-> IOSLA (XIOState s0) XmlTree (IOSArrow XmlTree XmlTree)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> (c -> d) -> a b d
>>^ ((String -> IOSArrow XmlTree XmlTree)
-> String -> IOSArrow XmlTree XmlTree
forall a b. (a -> b) -> a -> b
$ String
src)))
    readD Bool
False
        = String -> IOSLA (XIOState s0) a XmlTree
forall s b. String -> IOStateArrow s b XmlTree
readDocument'' String
src

readDocument''   :: String -> IOStateArrow s b XmlTree
readDocument'' :: forall s b. String -> IOStateArrow s b XmlTree
readDocument'' String
src
    = String -> IOStateArrow s b XmlTree
forall s b. String -> IOStateArrow s b XmlTree
getDocumentContents String
src
      IOStateArrow s b XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree -> IOStateArrow s b XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      ( String
-> (Bool, (Bool, ([String], (Bool, Bool))))
-> IOSLA (XIOState s) XmlTree XmlTree
forall {s}.
String
-> (Bool, (Bool, ([String], (Bool, Bool))))
-> IOSLA (XIOState s) XmlTree XmlTree
processDoc
        (String
 -> (Bool, (Bool, ([String], (Bool, Bool))))
 -> IOSLA (XIOState s) XmlTree XmlTree)
-> IOSLA
     (XIOState s)
     XmlTree
     (String, (Bool, (Bool, ([String], (Bool, Bool)))))
-> IOSLA (XIOState s) XmlTree XmlTree
forall c1 c2 b d.
(c1 -> c2 -> IOSLA (XIOState s) b d)
-> IOSLA (XIOState s) b (c1, c2) -> IOSLA (XIOState s) b d
forall (a :: * -> * -> *) c1 c2 b d.
ArrowList a =>
(c1 -> c2 -> a b d) -> a b (c1, c2) -> a b d
$<<
        ( IOSLA (XIOState s) XmlTree String
getMimeType
          IOSLA (XIOState s) XmlTree String
-> IOSLA
     (XIOState s) XmlTree (Bool, (Bool, ([String], (Bool, Bool))))
-> IOSLA
     (XIOState s)
     XmlTree
     (String, (Bool, (Bool, ([String], (Bool, Bool)))))
forall b c c'.
IOSLA (XIOState s) b c
-> IOSLA (XIOState s) b c' -> IOSLA (XIOState s) b (c, c')
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&&
          Selector XIOSysState (Bool, (Bool, ([String], (Bool, Bool))))
-> IOSLA
     (XIOState s) XmlTree (Bool, (Bool, ([String], (Bool, Bool))))
forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar (Selector XIOSysState Bool
theParseByMimeType   Selector XIOSysState Bool
-> Selector XIOSysState (Bool, ([String], (Bool, Bool)))
-> Selector XIOSysState (Bool, (Bool, ([String], (Bool, Bool))))
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                     Selector XIOSysState Bool
theParseHTML         Selector XIOSysState Bool
-> Selector XIOSysState ([String], (Bool, Bool))
-> Selector XIOSysState (Bool, ([String], (Bool, Bool)))
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                     Selector XIOSysState [String]
theAcceptedMimeTypes Selector XIOSysState [String]
-> Selector XIOSysState (Bool, Bool)
-> Selector XIOSysState ([String], (Bool, Bool))
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                     Selector XIOSysState Bool
theRelaxValidate     Selector XIOSysState Bool
-> Selector XIOSysState Bool -> Selector XIOSysState (Bool, Bool)
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                     Selector XIOSysState Bool
theXmlSchemaValidate
                    )
        )
      )
      IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      Int -> String -> IOSLA (XIOState s) XmlTree XmlTree
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 (String
"readDocument: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show String
src String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" processed")
      IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      IOSLA (XIOState s) XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
traceSource
      IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      IOSLA (XIOState s) XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
traceTree
    where
    processNoneEmptyDoc :: a XmlTree XmlTree -> a XmlTree XmlTree
processNoneEmptyDoc a XmlTree XmlTree
p
        = a XmlTree XmlTree
-> a XmlTree XmlTree -> a XmlTree XmlTree -> a XmlTree XmlTree
forall b c d. a b c -> a b d -> a b d -> a b d
forall (a :: * -> * -> *) b c d.
ArrowIf a =>
a b c -> a b d -> a b d -> a b d
ifA (LA XmlTree XmlTree -> a XmlTree XmlTree
forall (a :: * -> * -> *) b c. ArrowList a => LA b c -> a b c
fromLA LA XmlTree XmlTree
hasEmptyBody)
              (a XmlTree XmlTree -> a XmlTree XmlTree
forall (t :: * -> *) b. Tree t => a (t b) (t b) -> a (t b) (t b)
forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b) -> a (t b) (t b)
replaceChildren a XmlTree XmlTree
forall b c. a b c
forall (a :: * -> * -> *) b c. ArrowList a => a b c
none)
              a XmlTree XmlTree
p
        where
          hasEmptyBody :: LA XmlTree XmlTree
hasEmptyBody
              = String -> (String -> Bool) -> LA XmlTree XmlTree
forall (a :: * -> * -> *).
ArrowXml a =>
String -> (String -> Bool) -> a XmlTree XmlTree
hasAttrValue String
transferStatus (String -> String -> Bool
forall a. Eq a => a -> a -> Bool
/= String
"200")        -- test on empty response body for not o.k. responses
                LA XmlTree XmlTree -> LA XmlTree XmlTree -> LA XmlTree XmlTree
forall b c d. LA b c -> LA b d -> LA b d
forall (a :: * -> * -> *) b c d.
ArrowIf a =>
a b c -> a b d -> a b d
`guards`                                      -- e.g. 3xx status values
                ( LA XmlTree XmlTree -> LA XmlTree XmlTree
forall b c. LA b c -> LA b b
forall (a :: * -> * -> *) b c. ArrowIf a => a b c -> a b b
neg LA XmlTree XmlTree
forall (t :: * -> *) b. Tree t => LA (t b) (t b)
forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b)
getChildren
                  LA XmlTree XmlTree -> LA XmlTree XmlTree -> LA XmlTree XmlTree
forall b c. LA b c -> LA b c -> LA b c
forall (a :: * -> * -> *) b c.
ArrowPlus a =>
a b c -> a b c -> a b c
<+>
                  ( LA XmlTree XmlTree
forall (t :: * -> *) b. Tree t => LA (t b) (t b)
forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b)
getChildren LA XmlTree XmlTree -> LA XmlTree XmlTree -> LA XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> LA XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
isWhiteSpace )
                )

    getMimeType :: IOSLA (XIOState s) XmlTree String
getMimeType
        = String -> IOSLA (XIOState s) XmlTree String
forall (a :: * -> * -> *). ArrowXml a => String -> a XmlTree String
getAttrValue String
transferMimeType IOSLA (XIOState s) XmlTree String
-> (String -> String) -> IOSLA (XIOState s) XmlTree String
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> (c -> d) -> a b d
>>^ String -> String
stringToLower

    applyMimeTypeHandler :: String -> IOStateArrow s0 XmlTree XmlTree
applyMimeTypeHandler String
mt
        = IOSArrow XmlTree XmlTree -> IOStateArrow s0 XmlTree XmlTree
forall b c s0. IOSArrow b c -> IOStateArrow s0 b c
withoutUserState (Map String (IOSArrow XmlTree XmlTree) -> IOSArrow XmlTree XmlTree
forall {s}.
Map String (IOSLA (XIOState s) XmlTree XmlTree)
-> IOSLA (XIOState s) XmlTree XmlTree
applyMTH (Map String (IOSArrow XmlTree XmlTree) -> IOSArrow XmlTree XmlTree)
-> IOSLA
     (XIOState ()) XmlTree (Map String (IOSArrow XmlTree XmlTree))
-> IOSArrow XmlTree XmlTree
forall c b d.
(c -> IOSLA (XIOState ()) b d)
-> IOSLA (XIOState ()) b c -> IOSLA (XIOState ()) b d
forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< Selector XIOSysState (Map String (IOSArrow XmlTree XmlTree))
-> IOSLA
     (XIOState ()) XmlTree (Map String (IOSArrow XmlTree XmlTree))
forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState (Map String (IOSArrow XmlTree XmlTree))
theMimeTypeHandlers)
        where
          applyMTH :: Map String (IOSLA (XIOState s) XmlTree XmlTree)
-> IOSLA (XIOState s) XmlTree XmlTree
applyMTH Map String (IOSLA (XIOState s) XmlTree XmlTree)
mtTable
              = IOSLA (XIOState s) XmlTree XmlTree
-> Maybe (IOSLA (XIOState s) XmlTree XmlTree)
-> IOSLA (XIOState s) XmlTree XmlTree
forall a. a -> Maybe a -> a
fromMaybe IOSLA (XIOState s) XmlTree XmlTree
forall b c. IOSLA (XIOState s) b c
forall (a :: * -> * -> *) b c. ArrowList a => a b c
none (Maybe (IOSLA (XIOState s) XmlTree XmlTree)
 -> IOSLA (XIOState s) XmlTree XmlTree)
-> Maybe (IOSLA (XIOState s) XmlTree XmlTree)
-> IOSLA (XIOState s) XmlTree XmlTree
forall a b. (a -> b) -> a -> b
$
                (IOSLA (XIOState s) XmlTree XmlTree
 -> IOSLA (XIOState s) XmlTree XmlTree)
-> Maybe (IOSLA (XIOState s) XmlTree XmlTree)
-> Maybe (IOSLA (XIOState s) XmlTree XmlTree)
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\ IOSLA (XIOState s) XmlTree XmlTree
f -> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {a :: * -> * -> *}.
ArrowTree a =>
a XmlTree XmlTree -> a XmlTree XmlTree
processNoneEmptyDoc
                             (IOSLA (XIOState s) XmlTree XmlTree
forall {s} {b}. IOStateArrow s b b
traceMimeStart IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> IOSLA (XIOState s) XmlTree XmlTree
f IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> IOSLA (XIOState s) XmlTree XmlTree
forall {s} {b}. IOStateArrow s b b
traceMimeEnd)
                     ) (Maybe (IOSLA (XIOState s) XmlTree XmlTree)
 -> Maybe (IOSLA (XIOState s) XmlTree XmlTree))
-> Maybe (IOSLA (XIOState s) XmlTree XmlTree)
-> Maybe (IOSLA (XIOState s) XmlTree XmlTree)
forall a b. (a -> b) -> a -> b
$
                String
-> Map String (IOSLA (XIOState s) XmlTree XmlTree)
-> Maybe (IOSLA (XIOState s) XmlTree XmlTree)
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup String
mt Map String (IOSLA (XIOState s) XmlTree XmlTree)
mtTable
          traceMimeStart :: IOStateArrow s b b
traceMimeStart
              = Int -> String -> IOStateArrow s b b
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
2 (String -> IOStateArrow s b b) -> String -> IOStateArrow s b b
forall a b. (a -> b) -> a -> b
$
                String
"readDocument: calling user defined document parser"
          traceMimeEnd :: IOStateArrow s b b
traceMimeEnd
              = Int -> String -> IOStateArrow s b b
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
2 (String -> IOStateArrow s b b) -> String -> IOStateArrow s b b
forall a b. (a -> b) -> a -> b
$
                String
"readDocument: user defined document parser finished"

    processDoc :: String
-> (Bool, (Bool, ([String], (Bool, Bool))))
-> IOSLA (XIOState s) XmlTree XmlTree
processDoc String
mimeType (Bool, (Bool, ([String], (Bool, Bool))))
options
        = Int -> String -> IOSLA (XIOState s) XmlTree XmlTree
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 ([String] -> String
unwords [ String
"readDocument:", String -> String
forall a. Show a => a -> String
show String
src
                              , String
"(mime type:", String -> String
forall a. Show a => a -> String
show String
mimeType, String
") will be processed"
                              ]
                     )
          IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
          ( String -> IOSLA (XIOState s) XmlTree XmlTree
forall {s0}. String -> IOStateArrow s0 XmlTree XmlTree
applyMimeTypeHandler String
mimeType       -- try user defined document handlers
            IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall b c.
IOSLA (XIOState s) b c
-> IOSLA (XIOState s) b c -> IOSLA (XIOState s) b c
forall (a :: * -> * -> *) b c. ArrowIf a => a b c -> a b c -> a b c
`orElse`
            String
-> (Bool, (Bool, ([String], (Bool, Bool))))
-> IOSLA (XIOState s) XmlTree XmlTree
forall {s}.
String
-> (Bool, (Bool, ([String], (Bool, Bool))))
-> IOSLA (XIOState s) XmlTree XmlTree
processDoc' String
mimeType (Bool, (Bool, ([String], (Bool, Bool))))
options
          )

    processDoc' :: String
-> (Bool, (Bool, ([String], (Bool, Bool))))
-> IOSLA (XIOState s) XmlTree XmlTree
processDoc' String
mimeType ( Bool
parseByMimeType
                         , ( Bool
parseHtml
                           , ( [String]
acceptedMimeTypes
                             , ( Bool
validateWithRelax
                               , Bool
validateWithXmlSchema
                               ))))
        = ( if [String] -> String -> Bool
isAcceptedMimeType [String]
acceptedMimeTypes String
mimeType
            then ( IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {a :: * -> * -> *}.
ArrowTree a =>
a XmlTree XmlTree -> a XmlTree XmlTree
processNoneEmptyDoc
                   ( ( (Bool, (Bool, (Bool, (Bool, (Bool, Bool)))))
-> IOSLA (XIOState s) XmlTree XmlTree
forall {s}.
(Bool, (Bool, (Bool, (Bool, (Bool, Bool)))))
-> IOSLA (XIOState s) XmlTree XmlTree
parse ((Bool, (Bool, (Bool, (Bool, (Bool, Bool)))))
 -> IOSLA (XIOState s) XmlTree XmlTree)
-> IOSLA
     (XIOState s) XmlTree (Bool, (Bool, (Bool, (Bool, (Bool, Bool)))))
-> IOSLA (XIOState s) XmlTree XmlTree
forall c b d.
(c -> IOSLA (XIOState s) b d)
-> IOSLA (XIOState s) b c -> IOSLA (XIOState s) b d
forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< Selector XIOSysState (Bool, (Bool, (Bool, (Bool, (Bool, Bool)))))
-> IOSLA
     (XIOState s) XmlTree (Bool, (Bool, (Bool, (Bool, (Bool, Bool)))))
forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar (Selector XIOSysState Bool
theValidate              Selector XIOSysState Bool
-> Selector XIOSysState (Bool, (Bool, (Bool, (Bool, Bool))))
-> Selector
     XIOSysState (Bool, (Bool, (Bool, (Bool, (Bool, Bool)))))
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                                           Selector XIOSysState Bool
theSubstDTDEntities      Selector XIOSysState Bool
-> Selector XIOSysState (Bool, (Bool, (Bool, Bool)))
-> Selector XIOSysState (Bool, (Bool, (Bool, (Bool, Bool))))
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                                           Selector XIOSysState Bool
theSubstHTMLEntities     Selector XIOSysState Bool
-> Selector XIOSysState (Bool, (Bool, Bool))
-> Selector XIOSysState (Bool, (Bool, (Bool, Bool)))
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                                           Selector XIOSysState Bool
theIgnoreNoneXmlContents Selector XIOSysState Bool
-> Selector XIOSysState (Bool, Bool)
-> Selector XIOSysState (Bool, (Bool, Bool))
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                                           Selector XIOSysState Bool
theTagSoup               Selector XIOSysState Bool
-> Selector XIOSysState Bool -> Selector XIOSysState (Bool, Bool)
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                                           Selector XIOSysState Bool
theExpat
                                          )
                     )
                     IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                     ( if Bool
isXmlOrHtml
                       then ( ( (Bool, Bool) -> IOSLA (XIOState s) XmlTree XmlTree
forall {s}. (Bool, Bool) -> IOStateArrow s XmlTree XmlTree
checknamespaces ((Bool, Bool) -> IOSLA (XIOState s) XmlTree XmlTree)
-> IOSLA (XIOState s) XmlTree (Bool, Bool)
-> IOSLA (XIOState s) XmlTree XmlTree
forall c b d.
(c -> IOSLA (XIOState s) b d)
-> IOSLA (XIOState s) b c -> IOSLA (XIOState s) b d
forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< Selector XIOSysState (Bool, Bool)
-> IOSLA (XIOState s) XmlTree (Bool, Bool)
forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar (Selector XIOSysState Bool
theCheckNamespaces Selector XIOSysState Bool
-> Selector XIOSysState Bool -> Selector XIOSysState (Bool, Bool)
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                                                              Selector XIOSysState Bool
theTagSoup
                                                             )
                              )
                              IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                              IOSLA (XIOState s) XmlTree XmlTree
forall (a :: * -> * -> *). ArrowList a => a XmlTree XmlTree
rememberDTDAttrl
                              IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                              ( (Bool, (Bool, Bool)) -> IOSLA (XIOState s) XmlTree XmlTree
forall {a :: * -> * -> *}.
ArrowList a =>
(Bool, (Bool, Bool)) -> a XmlTree XmlTree
canonicalize ((Bool, (Bool, Bool)) -> IOSLA (XIOState s) XmlTree XmlTree)
-> IOSLA (XIOState s) XmlTree (Bool, (Bool, Bool))
-> IOSLA (XIOState s) XmlTree XmlTree
forall c b d.
(c -> IOSLA (XIOState s) b d)
-> IOSLA (XIOState s) b c -> IOSLA (XIOState s) b d
forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< Selector XIOSysState (Bool, (Bool, Bool))
-> IOSLA (XIOState s) XmlTree (Bool, (Bool, Bool))
forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar (Selector XIOSysState Bool
thePreserveComment Selector XIOSysState Bool
-> Selector XIOSysState (Bool, Bool)
-> Selector XIOSysState (Bool, (Bool, Bool))
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                                                           Selector XIOSysState Bool
theCanonicalize    Selector XIOSysState Bool
-> Selector XIOSysState Bool -> Selector XIOSysState (Bool, Bool)
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                                                           Selector XIOSysState Bool
theTagSoup
                                                          )
                              )
                              IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                              ( (Bool, Bool) -> IOSLA (XIOState s) XmlTree XmlTree
forall {a :: * -> * -> *}.
ArrowXml a =>
(Bool, Bool) -> a XmlTree XmlTree
whitespace ((Bool, Bool) -> IOSLA (XIOState s) XmlTree XmlTree)
-> IOSLA (XIOState s) XmlTree (Bool, Bool)
-> IOSLA (XIOState s) XmlTree XmlTree
forall c b d.
(c -> IOSLA (XIOState s) b d)
-> IOSLA (XIOState s) b c -> IOSLA (XIOState s) b d
forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< Selector XIOSysState (Bool, Bool)
-> IOSLA (XIOState s) XmlTree (Bool, Bool)
forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar (Selector XIOSysState Bool
theRemoveWS Selector XIOSysState Bool
-> Selector XIOSysState Bool -> Selector XIOSysState (Bool, Bool)
forall s a b. Selector s a -> Selector s b -> Selector s (a, b)
.&&&.
                                                         Selector XIOSysState Bool
theTagSoup
                                                        )
                              )
                              IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                              IOSLA (XIOState s) XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
relaxOrXmlSchema
                            )
                       else IOSLA (XIOState s) XmlTree XmlTree
forall b. IOSLA (XIOState s) b b
forall (a :: * -> * -> *) b. ArrowList a => a b b
this
                     )
                   )
                 )
            else ( Int -> String -> IOSLA (XIOState s) XmlTree XmlTree
forall s b. Int -> String -> IOStateArrow s b b
traceMsg Int
1 ([String] -> String
unwords [ String
"readDocument:", String -> String
forall a. Show a => a -> String
show String
src
                                       , String
"mime type:", String -> String
forall a. Show a => a -> String
show String
mimeType, String
"not accepted"])
                   IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                   IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall (t :: * -> *) b.
Tree t =>
IOSLA (XIOState s) (t b) (t b) -> IOSLA (XIOState s) (t b) (t b)
forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b) -> a (t b) (t b)
replaceChildren IOSLA (XIOState s) XmlTree XmlTree
forall b c. IOSLA (XIOState s) b c
forall (a :: * -> * -> *) b c. ArrowList a => a b c
none         -- remove contents of not accepted mimetype
                 )
          )
        where
        isAcceptedMimeType              :: [String] -> String -> Bool
        isAcceptedMimeType :: [String] -> String -> Bool
isAcceptedMimeType [String]
mts String
mt
            | [String] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [String]
mts
              Bool -> Bool -> Bool
||
              String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
mt                   = Bool
True
            | Bool
otherwise                 = ((String, String) -> Bool -> Bool)
-> Bool -> [(String, String)] -> Bool
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((String, String) -> (String, String) -> Bool -> Bool
matchMt (String, String)
mt') Bool
False ([(String, String)] -> Bool) -> [(String, String)] -> Bool
forall a b. (a -> b) -> a -> b
$ [(String, String)]
mts'
            where
            mt' :: (String, String)
mt'                         = String -> (String, String)
parseMt String
mt
            mts' :: [(String, String)]
mts'                        = (String -> (String, String)) -> [String] -> [(String, String)]
forall a b. (a -> b) -> [a] -> [b]
map String -> (String, String)
parseMt
                                          ([String] -> [(String, String)]) -> [String] -> [(String, String)]
forall a b. (a -> b) -> a -> b
$
                                          [String]
mts
            parseMt :: String -> (String, String)
parseMt                     = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'/')
                                          (String -> (String, String))
-> ((String, String) -> (String, String))
-> String
-> (String, String)
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                                          (String -> String) -> (String, String) -> (String, String)
forall b c d. (b -> c) -> (d, b) -> (d, c)
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (d, b) (d, c)
second (Int -> String -> String
forall a. Int -> [a] -> [a]
drop Int
1)
            matchMt :: (String, String) -> (String, String) -> Bool -> Bool
matchMt (String
ma,String
mi) (String
mas,String
mis) Bool
r = ( (String
ma String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
mas Bool -> Bool -> Bool
|| String
mas String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"*")
                                            Bool -> Bool -> Bool
&&
                                            (String
mi String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
mis Bool -> Bool -> Bool
|| String
mis String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
"*")
                                          )
                                          Bool -> Bool -> Bool
|| Bool
r
        parse :: (Bool, (Bool, (Bool, (Bool, (Bool, Bool)))))
-> IOSLA (XIOState s) XmlTree XmlTree
parse ( Bool
validate
              , ( Bool
substDTD
                , ( Bool
substHTML
                  , ( Bool
removeNoneXml
                    , ( Bool
withTagSoup'
                      , Bool
withExpat'
                      )))))
            | Bool -> Bool
not Bool
isXmlOrHtml           = if Bool
removeNoneXml
                                          then IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall (t :: * -> *) b.
Tree t =>
IOSLA (XIOState s) (t b) (t b) -> IOSLA (XIOState s) (t b) (t b)
forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b) -> a (t b) (t b)
replaceChildren IOSLA (XIOState s) XmlTree XmlTree
forall b c. IOSLA (XIOState s) b c
forall (a :: * -> * -> *) b c. ArrowList a => a b c
none             -- don't parse, if mime type is not XML nor HTML
                                          else IOSLA (XIOState s) XmlTree XmlTree
forall b. IOSLA (XIOState s) b b
forall (a :: * -> * -> *) b. ArrowList a => a b b
this                             -- but remove contents when option is set

            | Bool
isHtml
              Bool -> Bool -> Bool
||
              Bool
withTagSoup'              = SysConfig -> IOSLA (XIOState s) XmlTree XmlTree
forall s c. SysConfig -> IOStateArrow s c c
configSysVar (Selector XIOSysState Bool -> Bool -> SysConfig
forall s a. Selector s a -> a -> s -> s
setS Selector XIOSysState Bool
theLowerCaseNames Bool
isHtml)
                                          IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
-> IOSLA (XIOState s) XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
                                          IOSLA (XIOState s) XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
parseHtmlDocument                     -- parse as HTML or with tagsoup XML

            | Bool
isXml                     = if Bool
withExpat'
                                          then IOSLA (XIOState s) XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
parseXmlDocumentWithExpat
                                          else Bool -> Bool -> Bool -> Bool -> IOSLA (XIOState s) XmlTree XmlTree
forall s.
Bool -> Bool -> Bool -> Bool -> IOStateArrow s XmlTree XmlTree
parseXmlDocument
                                               Bool
validate
                                               Bool
substDTD
                                               Bool
substHTML
                                               Bool
validateWithRelax
                                                                                -- parse as XML
            | Bool
otherwise                 = IOSLA (XIOState s) XmlTree XmlTree
forall b. IOSLA (XIOState s) b b
forall (a :: * -> * -> *) b. ArrowList a => a b b
this                                  -- suppress warning

        checknamespaces :: (Bool, Bool) -> IOStateArrow s XmlTree XmlTree
checknamespaces (Bool
withNamespaces, Bool
withTagSoup')
            | Bool
withNamespaces
              Bool -> Bool -> Bool
&&
              Bool
withTagSoup'              = IOStateArrow s XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
andValidateNamespaces                 -- propagation is done in tagsoup

            | Bool
withNamespaces
              Bool -> Bool -> Bool
||
              Bool
validateWithRelax
              Bool -> Bool -> Bool
||
              Bool
validateWithXmlSchema
                                        = IOStateArrow s XmlTree XmlTree
forall s. IOStateArrow s XmlTree XmlTree
propagateAndValidateNamespaces        -- RelaxNG and XML Schema require correct namespaces

            | Bool
otherwise                 = IOStateArrow s XmlTree XmlTree
forall b. IOSLA (XIOState s) b b
forall (a :: * -> * -> *) b. ArrowList a => a b b
this

        canonicalize :: (Bool, (Bool, Bool)) -> a XmlTree XmlTree
canonicalize (Bool
preserveCmt, (Bool
canonicalize', Bool
withTagSoup'))
            | Bool
withTagSoup'              = a XmlTree XmlTree
forall b. a b b
forall (a :: * -> * -> *) b. ArrowList a => a b b
this                                  -- tagsoup already removes redundant stuff
            | Bool
validateWithRelax
              Bool -> Bool -> Bool
||
              Bool
validateWithXmlSchema     = a XmlTree XmlTree
forall (a :: * -> * -> *). ArrowList a => a XmlTree XmlTree
canonicalizeAllNodes                  -- no comments in schema validation

            | Bool
canonicalize'
              Bool -> Bool -> Bool
&&
              Bool
preserveCmt               = a XmlTree XmlTree
forall (a :: * -> * -> *). ArrowList a => a XmlTree XmlTree
canonicalizeForXPath
            | Bool
canonicalize'             = a XmlTree XmlTree
forall (a :: * -> * -> *). ArrowList a => a XmlTree XmlTree
canonicalizeAllNodes
            | Bool
otherwise                 = a XmlTree XmlTree
forall b. a b b
forall (a :: * -> * -> *) b. ArrowList a => a b b
this

        relaxOrXmlSchema :: IOSLA (XIOState s0) XmlTree XmlTree
relaxOrXmlSchema
            | Bool
validateWithXmlSchema     = IOSArrow XmlTree XmlTree -> IOSLA (XIOState s0) XmlTree XmlTree
forall b c s0. IOSArrow b c -> IOStateArrow s0 b c
withoutUserState (IOSArrow XmlTree XmlTree -> IOSLA (XIOState s0) XmlTree XmlTree)
-> IOSLA (XIOState s0) XmlTree (IOSArrow XmlTree XmlTree)
-> IOSLA (XIOState s0) XmlTree XmlTree
forall c b d.
(c -> IOSLA (XIOState s0) b d)
-> IOSLA (XIOState s0) b c -> IOSLA (XIOState s0) b d
forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< Selector XIOSysState (IOSArrow XmlTree XmlTree)
-> IOSLA (XIOState s0) XmlTree (IOSArrow XmlTree XmlTree)
forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState (IOSArrow XmlTree XmlTree)
theXmlSchemaValidator
            | Bool
validateWithRelax         = IOSArrow XmlTree XmlTree -> IOSLA (XIOState s0) XmlTree XmlTree
forall b c s0. IOSArrow b c -> IOStateArrow s0 b c
withoutUserState (IOSArrow XmlTree XmlTree -> IOSLA (XIOState s0) XmlTree XmlTree)
-> IOSLA (XIOState s0) XmlTree (IOSArrow XmlTree XmlTree)
-> IOSLA (XIOState s0) XmlTree XmlTree
forall c b d.
(c -> IOSLA (XIOState s0) b d)
-> IOSLA (XIOState s0) b c -> IOSLA (XIOState s0) b d
forall (a :: * -> * -> *) c b d.
ArrowList a =>
(c -> a b d) -> a b c -> a b d
$< Selector XIOSysState (IOSArrow XmlTree XmlTree)
-> IOSLA (XIOState s0) XmlTree (IOSArrow XmlTree XmlTree)
forall c s b. Selector XIOSysState c -> IOStateArrow s b c
getSysVar Selector XIOSysState (IOSArrow XmlTree XmlTree)
theRelaxValidator
            | Bool
otherwise                 = IOSLA (XIOState s0) XmlTree XmlTree
forall b. IOSLA (XIOState s0) b b
forall (a :: * -> * -> *) b. ArrowList a => a b b
this

        whitespace :: (Bool, Bool) -> a XmlTree XmlTree
whitespace (Bool
removeWS, Bool
withTagSoup')
            | ( Bool
removeWS
                Bool -> Bool -> Bool
||
                Bool
validateWithXmlSchema                                           -- XML Schema does not like WS
              )
              Bool -> Bool -> Bool
&&
              Bool -> Bool
not Bool
withTagSoup'          = a XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
removeDocWhiteSpace                   -- tagsoup already removes whitespace
            | Bool
otherwise                 = a XmlTree XmlTree
forall b. a b b
forall (a :: * -> * -> *) b. ArrowList a => a b b
this

        isHtml :: Bool
isHtml                          = ( Bool -> Bool
not Bool
parseByMimeType Bool -> Bool -> Bool
&& Bool
parseHtml )  -- force HTML
                                          Bool -> Bool -> Bool
||
                                          ( Bool
parseByMimeType Bool -> Bool -> Bool
&& String -> Bool
isHtmlMimeType String
mimeType )

        isXml :: Bool
isXml                           = ( Bool -> Bool
not Bool
parseByMimeType Bool -> Bool -> Bool
&& Bool -> Bool
not Bool
parseHtml )
                                          Bool -> Bool -> Bool
||
                                          ( Bool
parseByMimeType
                                            Bool -> Bool -> Bool
&&
                                            ( String -> Bool
isXmlMimeType String
mimeType
                                              Bool -> Bool -> Bool
||
                                              String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
mimeType
                                            )                                   -- mime type is XML or not known
                                          )

        isXmlOrHtml :: Bool
isXmlOrHtml     = Bool
isHtml Bool -> Bool -> Bool
|| Bool
isXml

-- ------------------------------------------------------------

-- |
-- the arrow version of 'readDocument', the arrow input is the source URI

readFromDocument :: SysConfigList -> IOStateArrow s String XmlTree
readFromDocument :: forall s. SysConfigList -> IOStateArrow s String XmlTree
readFromDocument SysConfigList
config
    = IOSLA (XIOState s) String (IOSLA (XIOState s) String XmlTree)
-> IOSLA (XIOState s) String XmlTree
forall b c.
IOSLA (XIOState s) b (IOSLA (XIOState s) b c)
-> IOSLA (XIOState s) b c
forall (a :: * -> * -> *) b c. ArrowList a => a b (a b c) -> a b c
applyA ( (String -> IOSLA (XIOState s) String XmlTree)
-> IOSLA (XIOState s) String (IOSLA (XIOState s) String XmlTree)
forall b c. (b -> c) -> IOSLA (XIOState s) b c
forall (a :: * -> * -> *) b c. Arrow a => (b -> c) -> a b c
arr ((String -> IOSLA (XIOState s) String XmlTree)
 -> IOSLA (XIOState s) String (IOSLA (XIOState s) String XmlTree))
-> (String -> IOSLA (XIOState s) String XmlTree)
-> IOSLA (XIOState s) String (IOSLA (XIOState s) String XmlTree)
forall a b. (a -> b) -> a -> b
$ SysConfigList -> String -> IOSLA (XIOState s) String XmlTree
forall s b. SysConfigList -> String -> IOStateArrow s b XmlTree
readDocument SysConfigList
config )

-- ------------------------------------------------------------

-- |
-- read a document that is stored in a normal Haskell String
--
-- the same function as readDocument, but the parameter forms the input.
-- All options available for 'readDocument' are applicable for readString,
-- except input encoding options.
--
-- Encoding: No decoding is done, the String argument is taken as Unicode string
-- All decoding must be done before calling readString, even if the
-- XML document contains an encoding spec.

readString :: SysConfigList -> String -> IOStateArrow s b XmlTree
readString :: forall s b. SysConfigList -> String -> IOStateArrow s b XmlTree
readString SysConfigList
config String
content
    = SysConfigList -> String -> IOStateArrow s b XmlTree
forall s b. SysConfigList -> String -> IOStateArrow s b XmlTree
readDocument SysConfigList
config (String
stringProtocol String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
content)

-- ------------------------------------------------------------

-- |
-- the arrow version of 'readString', the arrow input is the source URI

readFromString :: SysConfigList -> IOStateArrow s String XmlTree
readFromString :: forall s. SysConfigList -> IOStateArrow s String XmlTree
readFromString SysConfigList
config
    = IOSLA (XIOState s) String (IOSLA (XIOState s) String XmlTree)
-> IOSLA (XIOState s) String XmlTree
forall b c.
IOSLA (XIOState s) b (IOSLA (XIOState s) b c)
-> IOSLA (XIOState s) b c
forall (a :: * -> * -> *) b c. ArrowList a => a b (a b c) -> a b c
applyA ( (String -> IOSLA (XIOState s) String XmlTree)
-> IOSLA (XIOState s) String (IOSLA (XIOState s) String XmlTree)
forall b c. (b -> c) -> IOSLA (XIOState s) b c
forall (a :: * -> * -> *) b c. Arrow a => (b -> c) -> a b c
arr ((String -> IOSLA (XIOState s) String XmlTree)
 -> IOSLA (XIOState s) String (IOSLA (XIOState s) String XmlTree))
-> (String -> IOSLA (XIOState s) String XmlTree)
-> IOSLA (XIOState s) String (IOSLA (XIOState s) String XmlTree)
forall a b. (a -> b) -> a -> b
$ SysConfigList -> String -> IOSLA (XIOState s) String XmlTree
forall s b. SysConfigList -> String -> IOStateArrow s b XmlTree
readString SysConfigList
config )

-- ------------------------------------------------------------

-- |
-- parse a string as HTML content, substitute all HTML entity refs and canonicalize tree.
-- (substitute char refs, ...). Errors are ignored.
--
-- This arrow delegates all work to the parseHtmlContent parser in module HtmlParser.
--
-- This is a simpler version of 'readFromString' without any options,
-- but it does not run in the IO monad.

hread :: ArrowXml a => a String XmlTree
hread :: forall (a :: * -> * -> *). ArrowXml a => a String XmlTree
hread
    = LA String XmlTree -> a String XmlTree
forall (a :: * -> * -> *) b c. ArrowList a => LA b c -> a b c
fromLA (LA String XmlTree -> a String XmlTree)
-> LA String XmlTree -> a String XmlTree
forall a b. (a -> b) -> a -> b
$
      LA String XmlTree
forall (a :: * -> * -> *). ArrowList a => a String XmlTree
PI.hread                              -- substHtmlEntityRefs is done in parser
      LA String XmlTree -> LA XmlTree XmlTree -> LA String XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>                                   -- as well as subst HTML char refs
      [IfThen (LA XmlTree XmlTree) (LA XmlTree XmlTree)]
-> LA XmlTree XmlTree
forall b c.
[IfThen (LA (NTree b) c) (LA (NTree b) (NTree b))]
-> LA (NTree b) (NTree b)
editNTreeA [LA XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
isError LA XmlTree XmlTree
-> LA XmlTree XmlTree
-> IfThen (LA XmlTree XmlTree) (LA XmlTree XmlTree)
forall a b. a -> b -> IfThen a b
:-> LA XmlTree XmlTree
forall b c. LA b c
forall (a :: * -> * -> *) b c. ArrowList a => a b c
none]         -- ignores all errors
      LA XmlTree XmlTree -> LA XmlTree XmlTree -> LA XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      LA XmlTree XmlTree
forall (a :: * -> * -> *). ArrowList a => a XmlTree XmlTree
canonicalizeContents                  -- combine text nodes, substitute char refs
                                            -- comments are not removed

-- | like hread, but accepts a whole document, not a HTML content

hreadDoc :: ArrowXml a => a String XmlTree
hreadDoc :: forall (a :: * -> * -> *). ArrowXml a => a String XmlTree
hreadDoc
    = LA String XmlTree -> a String XmlTree
forall (a :: * -> * -> *) b c. ArrowList a => LA b c -> a b c
fromLA (LA String XmlTree -> a String XmlTree)
-> LA String XmlTree -> a String XmlTree
forall a b. (a -> b) -> a -> b
$
      [LA String XmlTree] -> [LA String XmlTree] -> LA String XmlTree
forall n. [LA n XmlTree] -> [LA n XmlTree] -> LA n XmlTree
forall (a :: * -> * -> *) n.
ArrowXml a =>
[a n XmlTree] -> [a n XmlTree] -> a n XmlTree
root [] [LA String XmlTree
forall (a :: * -> * -> *). ArrowList a => a String XmlTree
PI.hreadDoc]                 -- substHtmlEntityRefs is done in parser
      LA String XmlTree -> LA XmlTree XmlTree -> LA String XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>                                   -- as well as subst HTML char refs
      [IfThen (LA XmlTree XmlTree) (LA XmlTree XmlTree)]
-> LA XmlTree XmlTree
forall b c.
[IfThen (LA (NTree b) c) (LA (NTree b) (NTree b))]
-> LA (NTree b) (NTree b)
editNTreeA [LA XmlTree XmlTree
forall (a :: * -> * -> *). ArrowXml a => a XmlTree XmlTree
isError LA XmlTree XmlTree
-> LA XmlTree XmlTree
-> IfThen (LA XmlTree XmlTree) (LA XmlTree XmlTree)
forall a b. a -> b -> IfThen a b
:-> LA XmlTree XmlTree
forall b c. LA b c
forall (a :: * -> * -> *) b c. ArrowList a => a b c
none]         -- ignores all errors
      LA XmlTree XmlTree -> LA XmlTree XmlTree -> LA XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      LA XmlTree XmlTree
forall (a :: * -> * -> *). ArrowList a => a XmlTree XmlTree
canonicalizeForXPath                  -- remove DTD spec and text in content of root node
                                            -- and do a canonicalizeContents
      LA XmlTree XmlTree -> LA XmlTree XmlTree -> LA XmlTree XmlTree
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
      LA XmlTree XmlTree
forall (t :: * -> *) b. Tree t => LA (t b) (t b)
forall (a :: * -> * -> *) (t :: * -> *) b.
(ArrowTree a, Tree t) =>
a (t b) (t b)
getChildren
      
-- ------------------------------------------------------------

-- |
-- parse a string as XML CONTENT, (no xml decl or doctype decls are allowed),
-- substitute all predefined XML entity refs and canonicalize tree
-- This xread arrow delegates all work to the xread parser function in module XmlParsec

xread :: ArrowXml a => a String XmlTree
xread :: forall (a :: * -> * -> *). ArrowXml a => a String XmlTree
xread = a String XmlTree
forall (a :: * -> * -> *). ArrowXml a => a String XmlTree
PI.xreadCont

-- |
-- a more general version of xread which
-- parses a whole document including a prolog
-- (xml decl, doctype decl) and processing
-- instructions. Doctype decls remain uninterpreted,
-- but are in the list of results trees.

xreadDoc :: ArrowXml a => a String XmlTree
xreadDoc :: forall (a :: * -> * -> *). ArrowXml a => a String XmlTree
xreadDoc = a String XmlTree
forall (a :: * -> * -> *). ArrowXml a => a String XmlTree
PI.xreadDoc

{- -- the old version, where the parser does not subst char refs and cdata
xread                   = root [] [parseXmlContent]       -- substXmlEntityRefs is done in parser
                          >>>
                          canonicalizeContents
                          >>>
                          getChildren
-- -}

-- ------------------------------------------------------------