module Language.Haskell.Lexer
  ( PosToken
  , Token(..)
  , lexerPass0
  , lexerPass0'
  , lexerPass1
  , rmSpace
  , layoutPre
  , module Language.Haskell.Lexer.Position
  ) where

import Language.Haskell.Lexer.Lex(haskellLex)
import Language.Haskell.Lexer.Utils
import Language.Haskell.Lexer.Layout(layoutPre,PosToken)
import Language.Haskell.Lexer.Position
import Data.List(mapAccumL)

default(Int)

-- | The function 'lexerPass1' handles the part of lexical analysis that
-- can be done independently of the parser---the tokenization and the
-- addition of the extra layout tokens \<n\> and {n}, as specified in
-- section 9.3 of the revised Haskell 98 Report.
lexerPass1 :: String -> [PosToken]
lexerPass1 :: String -> [PosToken]
lexerPass1 = [PosToken] -> [PosToken]
lexerPass1Only ([PosToken] -> [PosToken])
-> (String -> [PosToken]) -> String -> [PosToken]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [PosToken]
lexerPass0

lexerPass1Only :: [PosToken] -> [PosToken]
lexerPass1Only :: [PosToken] -> [PosToken]
lexerPass1Only = [PosToken] -> [PosToken]
layoutPre ([PosToken] -> [PosToken])
-> ([PosToken] -> [PosToken]) -> [PosToken] -> [PosToken]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [PosToken] -> [PosToken]
rmSpace

-- | Remove token that are not meaningful (e.g., white space and comments).
rmSpace :: [PosToken] -> [PosToken]
rmSpace :: [PosToken] -> [PosToken]
rmSpace = (PosToken -> Bool) -> [PosToken] -> [PosToken]
forall a. (a -> Bool) -> [a] -> [a]
filter (Token -> Bool
notWhite(Token -> Bool) -> (PosToken -> Token) -> PosToken -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
.PosToken -> Token
forall a b. (a, b) -> a
fst)

notWhite :: Token -> Bool
notWhite :: Token -> Bool
notWhite Token
t = Token
tToken -> Token -> Bool
forall a. Eq a => a -> a -> Bool
/=Token
Whitespace Bool -> Bool -> Bool
&&
             Token
tToken -> Token -> Bool
forall a. Eq a => a -> a -> Bool
/=Token
Commentstart Bool -> Bool -> Bool
&& Token
tToken -> Token -> Bool
forall a. Eq a => a -> a -> Bool
/=Token
Comment Bool -> Bool -> Bool
&&
             Token
tToken -> Token -> Bool
forall a. Eq a => a -> a -> Bool
/=Token
NestedComment

-- | Tokenize and add position information.  Preserves white space,
-- and does not insert extra tokens due to layout.
lexerPass0 :: String -> [PosToken]
lexerPass0 :: String -> [PosToken]
lexerPass0 = Pos -> String -> [PosToken]
lexerPass0' Pos
startPos

-- | Same as 'lexerPass0', except that it uses the given start position.
lexerPass0' :: Pos -> String -> [PosToken]
lexerPass0' :: Pos -> String -> [PosToken]
lexerPass0' Pos
pos0 = [(Token, String)] -> [PosToken]
forall {a}. [(a, String)] -> [(a, (Pos, String))]
addPos ([(Token, String)] -> [PosToken])
-> (String -> [(Token, String)]) -> String -> [PosToken]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [(Token, String)]
haskellLex (String -> [(Token, String)])
-> (String -> String) -> String -> [(Token, String)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
rmcr
  where
    addPos :: [(a, String)] -> [(a, (Pos, String))]
addPos = (Pos, [(a, (Pos, String))]) -> [(a, (Pos, String))]
forall a b. (a, b) -> b
snd ((Pos, [(a, (Pos, String))]) -> [(a, (Pos, String))])
-> ([(a, String)] -> (Pos, [(a, (Pos, String))]))
-> [(a, String)]
-> [(a, (Pos, String))]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Pos -> (a, String) -> (Pos, (a, (Pos, String))))
-> Pos -> [(a, String)] -> (Pos, [(a, (Pos, String))])
forall (t :: * -> *) s a b.
Traversable t =>
(s -> a -> (s, b)) -> s -> t a -> (s, t b)
mapAccumL Pos -> (a, String) -> (Pos, (a, (Pos, String)))
forall {a}. Pos -> (a, String) -> (Pos, (a, (Pos, String)))
pos Pos
pos0

    pos :: Pos -> (a, String) -> (Pos, (a, (Pos, String)))
pos Pos
p (a
t,String
s) = {-seq p'-} (Pos
p',(a
t,(Pos
p,String
s)))
        where p' :: Pos
p' = Pos -> String -> Pos
nextPos Pos
p String
s
--      where s = reverse r


-- | Since #nextPos# examines one character at a time, it will increase the line
-- number by 2 if it sees \CR\LF, which can happen when reading DOS files on
-- a Unix like system.
-- Since the extra \CR characters can cause trouble later as well, we choose
-- to simply remove them here.
rmcr :: String -> String
rmcr :: String -> String
rmcr (Char
'\CR':Char
'\LF':String
s) = Char
'\LF'Char -> String -> String
forall a. a -> [a] -> [a]
:String -> String
rmcr String
s
rmcr (Char
c:String
s) = Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String -> String
rmcr String
s
rmcr String
"" = String
""