--  C->Haskell Compiler: Lexer for CHS Files
--
--  Author : Manuel M T Chakravarty
--  Created: 13 August 99
--
--  Version $Revision: 1.1 $ from $Date: 2004/11/21 21:05:35 $
--
--  Copyright (c) [1999..2004] Manuel M T Chakravarty
--
--  This file is free software; you can redistribute it and/or modify
--  it under the terms of the GNU General Public License as published by
--  the Free Software Foundation; either version 2 of the License, or
--  (at your option) any later version.
--
--  This file is distributed in the hope that it will be useful,
--  but WITHOUT ANY WARRANTY; without even the implied warranty of
--  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
--  GNU General Public License for more details.
--
--- DESCRIPTION ---------------------------------------------------------------
--
--  Lexer for CHS files; the tokens are only partially recognised.
--
--- DOCU ----------------------------------------------------------------------
--
--  language: Haskell 98
--
--  * CHS files are assumed to be Haskell 98 files that include C2HS binding
--    hooks.
--
--  * Haskell code is not tokenised, but binding hooks (delimited by `{#'and 
--    `#}') are analysed.  Therefore the lexer operates in two states
--    (realised as two lexer coupled by meta actions) depending on whether
--    Haskell code or a binding hook is currently read.  The lexer reading
--    Haskell code is called `base lexer'; the other one, `binding-hook
--    lexer'.  In addition, there is a inline-c lexer, which, as the
--    binding-hook lexer, can be triggered from the base lexer.
--
--  * Base lexer:
--
--      haskell -> (inline \\ special)*
--               | special \\ `"'
--               | comment
--               | nested
--               | hstring
--               | '{#'
--               | cpp
--      special -> `(' | `{' | `-' | `"'
--      ctrl    -> `\n' | `\f' | `\r' | `\t' | `\v'
--
--      inline  -> any \\ ctrl
--      any     -> '\0'..'\255'
--
--    Within the base lexer control codes appear as separate tokens in the
--    token list.
--
--    NOTE: It is important that `{' is an extra lexeme and not added as an
--          optional component at the end of the first alternative for
--          `haskell'.  Otherwise, the principle of the longest match will
--          divide `foo {#' into the tokens `foo {' and `#' instead of `foo '
--          and `{#'.
--
--    One line comments are handled by
--
--      comment -> `--' (any \\ `\n')* `\n'
--
--    and nested comments by
--
--      nested -> `{-' any* `-}'
--
--    where `any*' may contain _balanced_ occurrences of `{-' and `-}'.
--
--      hstring -> `"' inhstr* `"'
--      inhstr  -> ` '..`\127' \\ `"'
--               | `\"'
--
--    Pre-precessor directives as well as the switch to inline-C code are
--    formed as follows:
--
--      cpp     -> `\n#' (inline | `\t')* `\n'
--               | `\n#c' (' ' | '\t')* `\n'
--
--    We allow whitespace between the `#' and the actual directive, but in `#c'
--    and `#endc' the directive must immediately follow the `#'.  This might
--    be regarded as a not entirely orthogonal design, but simplifies matters
--    especially for `#endc'.
--
--  * On encountering the lexeme `{#', a meta action in the base lexer
--    transfers control to the following binding-hook lexer:
--
--      ident       -> letter (letter | digit | `\'')*
--                   | `\'' letter (letter | digit)* `\''
--      reservedid  -> `as' | `call' | `class' | `context' | `deriving' 
--                   | `enum' | `foreign' | `fun' | `get' | `lib' 
--                   | `newtype' | `pointer' | `prefix' | `pure' | `set'
--                   | `sizeof' | `stable' | `type' | `underscoreToCase' 
--                   | `unsafe' | `with' | 'lock' | 'unlock'
--      reservedsym -> `{#' | `#}' | `{' | `}' | `,' | `.' | `->' | `=' 
--                   | `=>' | '-' | `*' | `&' | `^'
--      string      -> `"' instr* `"'
--      verbhs      -> `\`' instr* `\''
--      instr       -> ` '..`\127' \\ `"'
--      comment     -> `--' (any \\ `\n')* `\n'
--
--    Control characters, white space, and comments are discarded in the
--    binding-hook lexer.  Nested comments are not allowed in a binding hook.
--    Identifiers can be enclosed in single quotes to avoid collision with
--    C->Haskell keywords.
--
--  * In the binding-hook lexer, the lexeme `#}' transfers control back to the 
--    base lexer.  An occurence of the lexeme `{#' inside the binding-hook
--    lexer triggers an error.  The symbol `{#' is not explcitly represented
--    in the resulting token stream.  However, the occurrence of a token
--    representing one of the reserved identifiers `call', `context', `enum',
--    and `field' marks the start of a binding hook.  Strictly speaking, `#}'
--    need also not occur in the token stream, as the next `haskell' token
--    marks a hook's end.  It is, however, useful for producing accurate error 
--    messages (in case an hook is closed to early) to have a token
--    representing `#}'.
--
--  * The rule `ident' describes Haskell identifiers, but without
--    distinguishing between variable and constructor identifers (ie, those
--    starting with a lowercase and those starting with an uppercase letter).
--    However, we use it also to scan C identifiers; although, strictly
--    speaking, it is too general for them.  In the case of C identifiers,
--    this should not have any impact on the range of descriptions accepted by
--    the tool, as illegal identifier will never occur in a C header file that
--    is accepted by the C lexer.  In the case of Haskell identifiers, a
--    confusion between variable and constructor identifiers will be noted by
--    the Haskell compiler translating the code generated by c2hs.  Moreover,
--    identifiers can be enclosed in single quotes to avoid collision with
--    C->Haskell keywords, but those may not contain apostrophes.
--
--  * Any line starting with the character `#' is regarded to be a C
--    preprocessor directive.  With the exception of `#c' and `#endc', which
--    delimit a set of lines containing inline C code.  Hence, in the base
--    lexer, the lexeme `#c' triggers a meta action transferring control to the
--    following inline-C lexer:
--
--      c  -> inline* \\ `\n#endc'
--
--    We do neither treat C strings nor C comments specially.  Hence, if the
--    string "\n#endc" occurs in a comment, we will mistakenly regard it as
--    the end of the inline C code.  Note that the problem cannot happen with
--    strings, as C does not permit strings that extend over multiple lines.
--    At the moment, it just seems not to be worth the effort required to
--    treat this situation more accurately.
--
--    The inline-C lexer also doesn't handle pre-processor directives
--    specially.  Hence, structural pre-processor directives (namely,
--    conditionals) may occur within inline-C code only properly nested.
--
--  Shortcomings
--  ~~~~~~~~~~~~
--  Some lexemes that include single and double quote characters are not lexed
--  correctly.  See the implementation comment at `haskell' for details.
--
--
--- TODO ----------------------------------------------------------------------
--
--  * In `haskell', the case of a single `"' (without a matching second one)
--    is caught by an eplicit error raising rule.  This shouldn't be
--    necessary, but for some strange reason, the lexer otherwise hangs when a 
--    single `"' appears in the input.
--
--  * Comments in the "gap" of a string are not yet supported.
--

module CHSLexer (CHSToken(..), lexCHS) 
where 

import Data.List         ((\\))
import Data.Char         (isDigit)
import Control.Monad     (liftM)
import Numeric   (readDec, readOct, readHex)

import Position  (Position(..), Pos(posOf), incPos, retPos, tabPos)
import Errors    (ErrorLvl(..), Error, makeError)
import UNames    (NameSupply, Name, names)
import Idents    (Ident, lexemeToIdent, identToLexeme)
import Lexers    (Regexp, Lexer, Action, epsilon, char, (+>), lexaction,
                  lexactionErr, lexmeta, (>|<), (>||<), ctrlLexer, star, plus,
                  quest, alt, string, LexerState, execLexer)

import C2HSState (CST, raise, raiseError, nop, getNameSupply) 


-- token definition
-- ----------------

-- possible tokens (EXPORTED)
--
data CHSToken = CHSTokArrow   Position          -- `->'
              | CHSTokDArrow  Position          -- `=>'
              | CHSTokDot     Position          -- `.'
              | CHSTokComma   Position          -- `,'
              | CHSTokEqual   Position          -- `='
              | CHSTokMinus   Position          -- `-'
              | CHSTokStar    Position          -- `*'
              | CHSTokAmp     Position          -- `&'
              | CHSTokHat     Position          -- `^'
              | CHSTokLBrace  Position          -- `{'
              | CHSTokRBrace  Position          -- `}'
              | CHSTokLParen  Position          -- `('
              | CHSTokRParen  Position          -- `)'
              | CHSTokEndHook Position          -- `#}'
              | CHSTokAs      Position          -- `as'
              | CHSTokCall    Position          -- `call'
              | CHSTokClass   Position          -- `class'
              | CHSTokContext Position          -- `context'
              | CHSTokDerive  Position          -- `deriving'
              | CHSTokEnum    Position          -- `enum'
              | CHSTokForeign Position          -- `foreign'
              | CHSTokFun     Position          -- `fun'
              | CHSTokGet     Position          -- `get'
              | CHSTokImport  Position          -- `import'
              | CHSTokLib     Position          -- `lib'
              | CHSTokNewtype Position          -- `newtype'
              | CHSTokPointer Position          -- `pointer'
              | CHSTokPrefix  Position          -- `prefix'
              | CHSTokPure    Position          -- `pure'
              | CHSTokQualif  Position          -- `qualified'
              | CHSTokSet     Position          -- `set'
              | CHSTokSizeof  Position          -- `sizeof'
              | CHSTokStable  Position          -- `stable'
              | CHSTokType    Position          -- `type'
              | CHSTok_2Case  Position          -- `underscoreToCase'
              | CHSTokUnsafe  Position          -- `unsafe'
              | CHSTokWith    Position          -- `with'
              | CHSTokLock    Position          -- `lock'
              | CHSTokNolock  Position          -- `nolock'
              | CHSTokString  Position String   -- string 
              | CHSTokHSVerb  Position String   -- verbatim Haskell (`...')
              | CHSTokIdent   Position Ident    -- identifier
              | CHSTokHaskell Position String   -- verbatim Haskell code
              | CHSTokCPP     Position String   -- pre-processor directive
              | CHSTokLine    Position          -- line pragma
              | CHSTokC       Position String   -- verbatim C code
              | CHSTokCtrl    Position Char     -- control code
              | CHSTokPragma  Position          -- '{-# LANGUAGE' language pragma begin
              | CHSTokPragEnd Position          -- '#-}' language pragma end

instance Pos CHSToken where
  posOf :: CHSToken -> Position
posOf (CHSTokArrow   pos :: Position
pos  ) = Position
pos
  posOf (CHSTokDArrow  pos :: Position
pos  ) = Position
pos
  posOf (CHSTokDot     pos :: Position
pos  ) = Position
pos
  posOf (CHSTokComma   pos :: Position
pos  ) = Position
pos
  posOf (CHSTokEqual   pos :: Position
pos  ) = Position
pos
  posOf (CHSTokMinus   pos :: Position
pos  ) = Position
pos
  posOf (CHSTokStar    pos :: Position
pos  ) = Position
pos
  posOf (CHSTokAmp     pos :: Position
pos  ) = Position
pos
  posOf (CHSTokHat     pos :: Position
pos  ) = Position
pos
  posOf (CHSTokLBrace  pos :: Position
pos  ) = Position
pos
  posOf (CHSTokRBrace  pos :: Position
pos  ) = Position
pos
  posOf (CHSTokLParen  pos :: Position
pos  ) = Position
pos
  posOf (CHSTokRParen  pos :: Position
pos  ) = Position
pos
  posOf (CHSTokEndHook pos :: Position
pos  ) = Position
pos
  posOf (CHSTokAs      pos :: Position
pos  ) = Position
pos
  posOf (CHSTokCall    pos :: Position
pos  ) = Position
pos
  posOf (CHSTokClass   pos :: Position
pos  ) = Position
pos
  posOf (CHSTokContext pos :: Position
pos  ) = Position
pos
  posOf (CHSTokDerive  pos :: Position
pos  ) = Position
pos
  posOf (CHSTokEnum    pos :: Position
pos  ) = Position
pos
  posOf (CHSTokForeign pos :: Position
pos  ) = Position
pos
  posOf (CHSTokFun     pos :: Position
pos  ) = Position
pos
  posOf (CHSTokGet     pos :: Position
pos  ) = Position
pos
  posOf (CHSTokImport  pos :: Position
pos  ) = Position
pos
  posOf (CHSTokLib     pos :: Position
pos  ) = Position
pos
  posOf (CHSTokNewtype pos :: Position
pos  ) = Position
pos
  posOf (CHSTokPointer pos :: Position
pos  ) = Position
pos
  posOf (CHSTokPrefix  pos :: Position
pos  ) = Position
pos
  posOf (CHSTokPure    pos :: Position
pos  ) = Position
pos
  posOf (CHSTokQualif  pos :: Position
pos  ) = Position
pos
  posOf (CHSTokSet     pos :: Position
pos  ) = Position
pos
  posOf (CHSTokSizeof  pos :: Position
pos  ) = Position
pos
  posOf (CHSTokStable  pos :: Position
pos  ) = Position
pos
  posOf (CHSTokType    pos :: Position
pos  ) = Position
pos
  posOf (CHSTok_2Case  pos :: Position
pos  ) = Position
pos
  posOf (CHSTokUnsafe  pos :: Position
pos  ) = Position
pos
  posOf (CHSTokWith    pos :: Position
pos  ) = Position
pos
  posOf (CHSTokLock    pos :: Position
pos  ) = Position
pos
  posOf (CHSTokNolock  pos :: Position
pos  ) = Position
pos
  posOf (CHSTokString  pos :: Position
pos _) = Position
pos
  posOf (CHSTokHSVerb  pos :: Position
pos _) = Position
pos
  posOf (CHSTokIdent   pos :: Position
pos _) = Position
pos
  posOf (CHSTokHaskell pos :: Position
pos _) = Position
pos
  posOf (CHSTokCPP     pos :: Position
pos _) = Position
pos
  posOf (CHSTokC       pos :: Position
pos _) = Position
pos
  posOf (CHSTokCtrl    pos :: Position
pos _) = Position
pos
  posOf (CHSTokPragma  pos :: Position
pos  ) = Position
pos
  posOf (CHSTokPragEnd pos :: Position
pos  ) = Position
pos

instance Eq CHSToken where
  (CHSTokArrow    _  ) == :: CHSToken -> CHSToken -> Bool
== (CHSTokArrow    _  ) = Bool
True
  (CHSTokDArrow   _  ) == (CHSTokDArrow   _  ) = Bool
True
  (CHSTokDot      _  ) == (CHSTokDot      _  ) = Bool
True
  (CHSTokComma    _  ) == (CHSTokComma    _  ) = Bool
True
  (CHSTokEqual    _  ) == (CHSTokEqual    _  ) = Bool
True
  (CHSTokMinus    _  ) == (CHSTokMinus    _  ) = Bool
True
  (CHSTokStar     _  ) == (CHSTokStar     _  ) = Bool
True
  (CHSTokAmp      _  ) == (CHSTokAmp      _  ) = Bool
True
  (CHSTokHat      _  ) == (CHSTokHat      _  ) = Bool
True
  (CHSTokLBrace   _  ) == (CHSTokLBrace   _  ) = Bool
True
  (CHSTokRBrace   _  ) == (CHSTokRBrace   _  ) = Bool
True
  (CHSTokLParen   _  ) == (CHSTokLParen   _  ) = Bool
True
  (CHSTokRParen   _  ) == (CHSTokRParen   _  ) = Bool
True
  (CHSTokEndHook  _  ) == (CHSTokEndHook  _  ) = Bool
True
  (CHSTokAs       _  ) == (CHSTokAs       _  ) = Bool
True
  (CHSTokCall     _  ) == (CHSTokCall     _  ) = Bool
True
  (CHSTokClass    _  ) == (CHSTokClass    _  ) = Bool
True
  (CHSTokContext  _  ) == (CHSTokContext  _  ) = Bool
True
  (CHSTokDerive   _  ) == (CHSTokDerive   _  ) = Bool
True
  (CHSTokEnum     _  ) == (CHSTokEnum     _  ) = Bool
True
  (CHSTokForeign  _  ) == (CHSTokForeign  _  ) = Bool
True
  (CHSTokFun      _  ) == (CHSTokFun      _  ) = Bool
True
  (CHSTokGet      _  ) == (CHSTokGet      _  ) = Bool
True
  (CHSTokImport   _  ) == (CHSTokImport   _  ) = Bool
True
  (CHSTokLib      _  ) == (CHSTokLib      _  ) = Bool
True
  (CHSTokNewtype  _  ) == (CHSTokNewtype  _  ) = Bool
True
  (CHSTokPointer  _  ) == (CHSTokPointer  _  ) = Bool
True
  (CHSTokPrefix   _  ) == (CHSTokPrefix   _  ) = Bool
True
  (CHSTokPure     _  ) == (CHSTokPure     _  ) = Bool
True
  (CHSTokQualif   _  ) == (CHSTokQualif   _  ) = Bool
True
  (CHSTokSet      _  ) == (CHSTokSet      _  ) = Bool
True
  (CHSTokSizeof   _  ) == (CHSTokSizeof   _  ) = Bool
True
  (CHSTokStable   _  ) == (CHSTokStable   _  ) = Bool
True
  (CHSTokType     _  ) == (CHSTokType     _  ) = Bool
True
  (CHSTok_2Case   _  ) == (CHSTok_2Case   _  ) = Bool
True
  (CHSTokUnsafe   _  ) == (CHSTokUnsafe   _  ) = Bool
True
  (CHSTokWith     _  ) == (CHSTokWith     _  ) = Bool
True
  (CHSTokLock     _  ) == (CHSTokLock     _  ) = Bool
True
  (CHSTokNolock   _  ) == (CHSTokNolock   _  ) = Bool
True
  (CHSTokString   _ _) == (CHSTokString   _ _) = Bool
True
  (CHSTokHSVerb   _ _) == (CHSTokHSVerb   _ _) = Bool
True
  (CHSTokIdent    _ _) == (CHSTokIdent    _ _) = Bool
True
  (CHSTokHaskell  _ _) == (CHSTokHaskell  _ _) = Bool
True
  (CHSTokCPP      _ _) == (CHSTokCPP      _ _) = Bool
True
  (CHSTokC        _ _) == (CHSTokC        _ _) = Bool
True
  (CHSTokCtrl     _ _) == (CHSTokCtrl     _ _) = Bool
True
  (CHSTokPragma   _  ) == (CHSTokPragma   _  ) = Bool
True
  (CHSTokPragEnd  _  ) == (CHSTokPragEnd  _  ) = Bool
True
  _                    == _                    = Bool
False

instance Show CHSToken where
  showsPrec :: Int -> CHSToken -> ShowS
showsPrec _ (CHSTokArrow   _  ) = String -> ShowS
showString "->"
  showsPrec _ (CHSTokDArrow  _  ) = String -> ShowS
showString "=>"
  showsPrec _ (CHSTokDot     _  ) = String -> ShowS
showString "."
  showsPrec _ (CHSTokComma   _  ) = String -> ShowS
showString ","
  showsPrec _ (CHSTokEqual   _  ) = String -> ShowS
showString "="
  showsPrec _ (CHSTokMinus   _  ) = String -> ShowS
showString "-"
  showsPrec _ (CHSTokStar    _  ) = String -> ShowS
showString "*"
  showsPrec _ (CHSTokAmp     _  ) = String -> ShowS
showString "&"
  showsPrec _ (CHSTokHat     _  ) = String -> ShowS
showString "^"
  showsPrec _ (CHSTokLBrace  _  ) = String -> ShowS
showString "{"
  showsPrec _ (CHSTokRBrace  _  ) = String -> ShowS
showString "}"
  showsPrec _ (CHSTokLParen  _  ) = String -> ShowS
showString "("
  showsPrec _ (CHSTokRParen  _  ) = String -> ShowS
showString ")"
  showsPrec _ (CHSTokEndHook _  ) = String -> ShowS
showString "#}"
  showsPrec _ (CHSTokAs      _  ) = String -> ShowS
showString "as"
  showsPrec _ (CHSTokCall    _  ) = String -> ShowS
showString "call"
  showsPrec _ (CHSTokClass   _  ) = String -> ShowS
showString "class"
  showsPrec _ (CHSTokContext _  ) = String -> ShowS
showString "context"
  showsPrec _ (CHSTokDerive  _  ) = String -> ShowS
showString "deriving"
  showsPrec _ (CHSTokEnum    _  ) = String -> ShowS
showString "enum"
  showsPrec _ (CHSTokForeign _  ) = String -> ShowS
showString "foreign"
  showsPrec _ (CHSTokFun     _  ) = String -> ShowS
showString "fun"
  showsPrec _ (CHSTokGet     _  ) = String -> ShowS
showString "get"
  showsPrec _ (CHSTokImport  _  ) = String -> ShowS
showString "import"
  showsPrec _ (CHSTokLib     _  ) = String -> ShowS
showString "lib"
  showsPrec _ (CHSTokNewtype _  ) = String -> ShowS
showString "newtype"
  showsPrec _ (CHSTokPointer _  ) = String -> ShowS
showString "pointer"
  showsPrec _ (CHSTokPrefix  _  ) = String -> ShowS
showString "prefix"
  showsPrec _ (CHSTokPure    _  ) = String -> ShowS
showString "pure"
  showsPrec _ (CHSTokQualif  _  ) = String -> ShowS
showString "qualified"
  showsPrec _ (CHSTokSet     _  ) = String -> ShowS
showString "set"
  showsPrec _ (CHSTokSizeof  _  ) = String -> ShowS
showString "sizeof"
  showsPrec _ (CHSTokStable  _  ) = String -> ShowS
showString "stable"
  showsPrec _ (CHSTokType    _  ) = String -> ShowS
showString "type"
  showsPrec _ (CHSTok_2Case  _  ) = String -> ShowS
showString "underscoreToCase"
  showsPrec _ (CHSTokUnsafe  _  ) = String -> ShowS
showString "unsafe"
  showsPrec _ (CHSTokWith    _  ) = String -> ShowS
showString "with"
  showsPrec _ (CHSTokLock    _  ) = String -> ShowS
showString "lock"
  showsPrec _ (CHSTokNolock  _  ) = String -> ShowS
showString "nolock"
  showsPrec _ (CHSTokString  _ s :: String
s) = String -> ShowS
showString ("\"" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
s String -> ShowS
forall a. [a] -> [a] -> [a]
++ "\"")
  showsPrec _ (CHSTokHSVerb  _ s :: String
s) = String -> ShowS
showString ("`" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
s String -> ShowS
forall a. [a] -> [a] -> [a]
++ "'")
  showsPrec _ (CHSTokIdent   _ i :: Ident
i) = (String -> ShowS
showString (String -> ShowS) -> (Ident -> String) -> Ident -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Ident -> String
identToLexeme) Ident
i
  showsPrec _ (CHSTokHaskell _ s :: String
s) = String -> ShowS
showString String
s
  showsPrec _ (CHSTokCPP     _ s :: String
s) = String -> ShowS
showString String
s
  showsPrec _ (CHSTokC       _ s :: String
s) = String -> ShowS
showString String
s
  showsPrec _ (CHSTokCtrl    _ c :: Char
c) = Char -> ShowS
showChar Char
c
  showsPrec _ (CHSTokPragma  _  ) = String -> ShowS
showString "{-# LANGUAGE"
  showsPrec _ (CHSTokPragEnd _  ) = String -> ShowS
showString "#-}"


-- lexer state
-- -----------

-- state threaded through the lexer
--
data CHSLexerState = CHSLS {
                       CHSLexerState -> Int
nestLvl :: Int,   -- nesting depth of nested comments
                       CHSLexerState -> Bool
inHook  :: Bool,  -- within a binding hook?
                       CHSLexerState -> [Name]
namesup :: [Name] -- supply of unique names
                     }

-- initial state
--
initialState :: CST s CHSLexerState
initialState :: CST s CHSLexerState
initialState  = do
                  [Name]
namesup <- (NameSupply -> [Name])
-> PreCST SwitchBoard s NameSupply -> PreCST SwitchBoard s [Name]
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM NameSupply -> [Name]
names PreCST SwitchBoard s NameSupply
forall e s. PreCST e s NameSupply
getNameSupply
                  CHSLexerState -> CST s CHSLexerState
forall (m :: * -> *) a. Monad m => a -> m a
return (CHSLexerState -> CST s CHSLexerState)
-> CHSLexerState -> CST s CHSLexerState
forall a b. (a -> b) -> a -> b
$ CHSLS :: Int -> Bool -> [Name] -> CHSLexerState
CHSLS {
                             nestLvl :: Int
nestLvl = 0,
                             inHook :: Bool
inHook  = Bool
False,
                             namesup :: [Name]
namesup = [Name]
namesup
                           }

-- raise an error if the given state is not a final state
--
assertFinalState :: Position -> CHSLexerState -> CST s ()
assertFinalState :: Position -> CHSLexerState -> CST s ()
assertFinalState pos :: Position
pos CHSLS {nestLvl :: CHSLexerState -> Int
nestLvl = Int
nestLvl, inHook :: CHSLexerState -> Bool
inHook = Bool
inHook} 
  | Int
nestLvl Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0 = Position -> [String] -> CST s ()
forall e s. Position -> [String] -> PreCST e s ()
raiseError Position
pos ["Unexpected end of file!",
                                  "Unclosed nested comment."]
  | Bool
inHook      = Position -> [String] -> CST s ()
forall e s. Position -> [String] -> PreCST e s ()
raiseError Position
pos ["Unexpected end of file!",
                                  "Unclosed binding hook."]
  | Bool
otherwise   = CST s ()
forall e s. PreCST e s ()
nop

-- lexer and action type used throughout this specification
--
type CHSLexer  = Lexer  CHSLexerState CHSToken
type CHSAction = Action               CHSToken
type CHSRegexp = Regexp CHSLexerState CHSToken

-- for actions that need a new unique name
--
infixl 3 `lexactionName`
lexactionName :: CHSRegexp 
              -> (String -> Position -> Name -> CHSToken) 
              -> CHSLexer
re :: CHSRegexp
re lexactionName :: CHSRegexp -> (String -> Position -> Name -> CHSToken) -> CHSLexer
`lexactionName` action :: String -> Position -> Name -> CHSToken
action = CHSRegexp
re CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` Meta CHSLexerState CHSToken
forall a a.
String
-> Position
-> CHSLexerState
-> (Maybe (Either a CHSToken), Position, CHSLexerState, Maybe a)
action'
  where
    action' :: String
-> Position
-> CHSLexerState
-> (Maybe (Either a CHSToken), Position, CHSLexerState, Maybe a)
action' str :: String
str pos :: Position
pos state :: CHSLexerState
state = let name :: Name
name:ns :: [Name]
ns = CHSLexerState -> [Name]
namesup CHSLexerState
state
                            in
                            (Either a CHSToken -> Maybe (Either a CHSToken)
forall a. a -> Maybe a
Just (Either a CHSToken -> Maybe (Either a CHSToken))
-> Either a CHSToken -> Maybe (Either a CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either a CHSToken
forall a b. b -> Either a b
Right (String -> Position -> Name -> CHSToken
action String
str Position
pos Name
name),
                             Position -> Int -> Position
incPos Position
pos (String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
str),
                             CHSLexerState
state {namesup :: [Name]
namesup = [Name]
ns},
                             Maybe a
forall a. Maybe a
Nothing)


-- lexical specification
-- ---------------------

-- the lexical definition of the tokens (the base lexer)
--
--
chslexer :: CHSLexer
chslexer :: CHSLexer
chslexer  =      CHSLexer
pragma         -- LANGUAGE pragma
            CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
haskell        -- Haskell code
            CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
nested         -- nested comments
            CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
ctrl           -- control code (that has to be preserved)
            CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
hook           -- start of a binding hook
            CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
cpp            -- a pre-processor directive (or `#c')

-- the LANGUAGE pragma
pragma :: CHSLexer
pragma :: CHSLexer
pragma = String -> CHSRegexp
forall s t. String -> Regexp s t
string "{-# LANGUAGE" CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` \_ pos :: Position
pos s :: CHSLexerState
s ->
         (Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a. a -> Maybe a
Just (Either Error CHSToken -> Maybe (Either Error CHSToken))
-> Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either Error CHSToken
forall a b. b -> Either a b
Right (Position -> CHSToken
CHSTokPragma Position
pos), Position -> Int -> Position
incPos Position
pos 12, CHSLexerState
s, CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just CHSLexer
langLexer)

langLexer :: CHSLexer
langLexer :: CHSLexer
langLexer = CHSLexer
whitespace CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
identOrKW CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
symbol CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||<
            (String -> CHSRegexp
forall s t. String -> Regexp s t
string "#-}" CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` \_ pos :: Position
pos s :: CHSLexerState
s ->
            (Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a. a -> Maybe a
Just (Either Error CHSToken -> Maybe (Either Error CHSToken))
-> Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either Error CHSToken
forall a b. b -> Either a b
Right (Position -> CHSToken
CHSTokPragEnd Position
pos), Position -> Int -> Position
incPos Position
pos 3, CHSLexerState
s, CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just CHSLexer
chslexer))

-- stream of Haskell code (terminated by a control character or binding hook)
--
haskell :: CHSLexer
--
-- NB: We need to make sure that '"' is not regarded as the beginning of a
--     string; however, we cannot really lex character literals properly
--     without lexing identifiers (as the latter may containing single quotes
--     as part of their lexeme).  Thus, we special case '"'.  This is still a
--     kludge, as a program fragment, such as
--
--       foo'"'strange string"
--
--     will not be handled correctly.
--
haskell :: CHSLexer
haskell  = (    CHSRegexp
forall s t. Regexp s t
anyButSpecialCHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` CHSRegexp
forall s t. Regexp s t
epsilon
            CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< CHSRegexp
forall s t. Regexp s t
specialButQuotes
            CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '"'  CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> CHSRegexp
forall s t. Regexp s t
inhstrCHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '"'
            CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< String -> CHSRegexp
forall s t. String -> Regexp s t
string "'\"'"                           -- special case of "
            CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< String -> CHSRegexp
forall s t. String -> Regexp s t
string "--" CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> CHSRegexp
forall s t. Regexp s t
anyButNLCHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` CHSRegexp
forall s t. Regexp s t
epsilon   -- comment
           )
           CHSRegexp -> Action CHSToken -> CHSLexer
forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` Action CHSToken
copyVerbatim
           CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '"'                                -- this is a bad kludge
                CHSRegexp -> ActionErr CHSToken -> CHSLexer
forall s t. Regexp s t -> ActionErr t -> Lexer s t
`lexactionErr` 
                  \_ pos :: Position
pos -> (Error -> Either Error CHSToken
forall a b. a -> Either a b
Left (Error -> Either Error CHSToken) -> Error -> Either Error CHSToken
forall a b. (a -> b) -> a -> b
$ ErrorLvl -> Position -> [String] -> Error
makeError ErrorLvl
ErrorErr Position
pos
                                              ["Lexical error!", 
                                              "Unclosed string."])
           where
             anyButSpecial :: Regexp s t
anyButSpecial    = String -> Regexp s t
forall s t. String -> Regexp s t
alt (String
inlineSet String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ String
specialSet)
             specialButQuotes :: Regexp s t
specialButQuotes = String -> Regexp s t
forall s t. String -> Regexp s t
alt (String
specialSet String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ ['"'])
             anyButNL :: Regexp s t
anyButNL         = String -> Regexp s t
forall s t. String -> Regexp s t
alt (String
anySet String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ ['\n'])
             inhstr :: Regexp s t
inhstr           = Regexp s t
forall s t. Regexp s t
instr Regexp s t -> Regexp s t -> Regexp s t
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< Char -> Regexp s t
forall s t. Char -> Regexp s t
char '\\' Regexp s t -> Regexp s t -> Regexp s t
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< String -> Regexp s t
forall s t. String -> Regexp s t
string "\\\"" Regexp s t -> Regexp s t -> Regexp s t
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< Regexp s t
forall s t. Regexp s t
gap
             gap :: Regexp s t
gap              = Char -> Regexp s t
forall s t. Char -> Regexp s t
char '\\' Regexp s t -> Regexp s t -> Regexp s t
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> String -> Regexp s t
forall s t. String -> Regexp s t
alt (' 'Char -> ShowS
forall a. a -> [a] -> [a]
:String
ctrlSet)Regexp s t -> Regexp s t -> Regexp s t
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`plus` Char -> Regexp s t
forall s t. Char -> Regexp s t
char '\\'

-- action copying the input verbatim to `CHSTokHaskell' tokens
--
copyVerbatim        :: CHSAction 
copyVerbatim :: Action CHSToken
copyVerbatim cs :: String
cs pos :: Position
pos  = CHSToken -> Maybe CHSToken
forall a. a -> Maybe a
Just (CHSToken -> Maybe CHSToken) -> CHSToken -> Maybe CHSToken
forall a b. (a -> b) -> a -> b
$ Position -> String -> CHSToken
CHSTokHaskell Position
pos String
cs

-- nested comments
--
nested :: CHSLexer
nested :: CHSLexer
nested  =
       String -> CHSRegexp
forall s t. String -> Regexp s t
string "{-"              {- for Haskell emacs mode :-( -}
       CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` Meta CHSLexerState CHSToken
forall a.
String
-> Position
-> CHSLexerState
-> (Maybe (Either a CHSToken), Position, CHSLexerState,
    Maybe CHSLexer)
enterComment
  CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||<
       String -> CHSRegexp
forall s t. String -> Regexp s t
string "-}"
       CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` Meta CHSLexerState CHSToken
leaveComment
  where
    enterComment :: String
-> Position
-> CHSLexerState
-> (Maybe (Either a CHSToken), Position, CHSLexerState,
    Maybe CHSLexer)
enterComment cs :: String
cs pos :: Position
pos s :: CHSLexerState
s =
      (String -> Position -> Maybe (Either a CHSToken)
forall a. String -> Position -> Maybe (Either a CHSToken)
copyVerbatim' String
cs Position
pos,                    -- collect the lexeme
       Position -> Int -> Position
incPos Position
pos 2,                            -- advance current position
       CHSLexerState
s {nestLvl :: Int
nestLvl = CHSLexerState -> Int
nestLvl CHSLexerState
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1},             -- increase nesting level
       CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just (CHSLexer -> Maybe CHSLexer) -> CHSLexer -> Maybe CHSLexer
forall a b. (a -> b) -> a -> b
$ CHSLexer
inNestedComment)                  -- continue in comment lexer
    --
    leaveComment :: Meta CHSLexerState CHSToken
leaveComment cs :: String
cs pos :: Position
pos s :: CHSLexerState
s =
      case CHSLexerState -> Int
nestLvl CHSLexerState
s of
        0 -> (Position -> Maybe (Either Error CHSToken)
forall b. Position -> Maybe (Either Error b)
commentCloseErr Position
pos,              -- 0: -} outside comment => err
              Position -> Int -> Position
incPos Position
pos 2,                     -- advance current position
              CHSLexerState
s,
              Maybe CHSLexer
forall a. Maybe a
Nothing)
        1 -> (String -> Position -> Maybe (Either Error CHSToken)
forall a. String -> Position -> Maybe (Either a CHSToken)
copyVerbatim' String
cs Position
pos,             -- collect the lexeme
              Position -> Int -> Position
incPos Position
pos 2,                     -- advance current position
              CHSLexerState
s {nestLvl :: Int
nestLvl = CHSLexerState -> Int
nestLvl CHSLexerState
s Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1},      -- decrease nesting level
              CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just CHSLexer
chslexer)                    -- 1: continue with root lexer
        _ -> (String -> Position -> Maybe (Either Error CHSToken)
forall a. String -> Position -> Maybe (Either a CHSToken)
copyVerbatim' String
cs Position
pos,             -- collect the lexeme
              Position -> Int -> Position
incPos Position
pos 2,                     -- advance current position
              CHSLexerState
s {nestLvl :: Int
nestLvl = CHSLexerState -> Int
nestLvl CHSLexerState
s Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1},      -- decrease nesting level
              Maybe CHSLexer
forall a. Maybe a
Nothing)                          -- _: cont with comment lexer
    --
    copyVerbatim' :: String -> Position -> Maybe (Either a CHSToken)
copyVerbatim' cs :: String
cs pos :: Position
pos  = Either a CHSToken -> Maybe (Either a CHSToken)
forall a. a -> Maybe a
Just (Either a CHSToken -> Maybe (Either a CHSToken))
-> Either a CHSToken -> Maybe (Either a CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either a CHSToken
forall a b. b -> Either a b
Right (Position -> String -> CHSToken
CHSTokHaskell Position
pos String
cs)
    --
    commentCloseErr :: Position -> Maybe (Either Error b)
commentCloseErr pos :: Position
pos =
      Either Error b -> Maybe (Either Error b)
forall a. a -> Maybe a
Just (Either Error b -> Maybe (Either Error b))
-> Either Error b -> Maybe (Either Error b)
forall a b. (a -> b) -> a -> b
$ Error -> Either Error b
forall a b. a -> Either a b
Left (ErrorLvl -> Position -> [String] -> Error
makeError ErrorLvl
ErrorErr Position
pos
                             ["Lexical error!", 
                             "`-}' not preceded by a matching `{-'."])
                             {- for Haskell emacs mode :-( -}


-- lexer processing the inner of a comment
--
inNestedComment :: CHSLexer
inNestedComment :: CHSLexer
inNestedComment  =      CHSLexer
commentInterior         -- inside a comment
                   CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
nested                  -- nested comments
                   CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
ctrl                    -- control code (preserved)

-- standard characters in a nested comment
--
commentInterior :: CHSLexer
commentInterior :: CHSLexer
commentInterior  = (    CHSRegexp
forall s t. Regexp s t
anyButSpecialCHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` CHSRegexp
forall s t. Regexp s t
epsilon
                    CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< CHSRegexp
forall s t. Regexp s t
special
                   )
                   CHSRegexp -> Action CHSToken -> CHSLexer
forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` Action CHSToken
copyVerbatim
                   where
                     anyButSpecial :: Regexp s t
anyButSpecial = String -> Regexp s t
forall s t. String -> Regexp s t
alt (String
inlineSet String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ String
commentSpecialSet)
                     special :: Regexp s t
special       = String -> Regexp s t
forall s t. String -> Regexp s t
alt String
commentSpecialSet

-- control code in the base lexer (is turned into a token)
--
--  * this covers exactly the same set of characters as contained in `ctrlSet'
--   and `Lexers.ctrlLexer' and advances positions also like the `ctrlLexer'
--
ctrl :: CHSLexer
ctrl :: CHSLexer
ctrl  =     
       Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '\n' CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` Meta CHSLexerState CHSToken
forall c a a.
String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
newline
  CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '\r' CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` Meta CHSLexerState CHSToken
forall c a a.
String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
newline
  CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '\v' CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` Meta CHSLexerState CHSToken
forall c a a.
String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
newline
  CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '\f' CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` Meta CHSLexerState CHSToken
forall c a a.
String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
formfeed
  CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '\t' CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` Meta CHSLexerState CHSToken
forall c a a.
String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
tab
  where
    newline :: String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
newline  [c :: Char
c] pos :: Position
pos = Position
-> Char
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
forall b c a a.
Position
-> Char -> b -> c -> (Maybe (Either a CHSToken), b, c, Maybe a)
ctrlResult Position
pos Char
c (Position -> Position
retPos Position
pos)
    formfeed :: String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
formfeed [c :: Char
c] pos :: Position
pos = Position
-> Char
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
forall b c a a.
Position
-> Char -> b -> c -> (Maybe (Either a CHSToken), b, c, Maybe a)
ctrlResult Position
pos Char
c (Position -> Int -> Position
incPos Position
pos 1)
    tab :: String
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
tab      [c :: Char
c] pos :: Position
pos = Position
-> Char
-> Position
-> c
-> (Maybe (Either a CHSToken), Position, c, Maybe a)
forall b c a a.
Position
-> Char -> b -> c -> (Maybe (Either a CHSToken), b, c, Maybe a)
ctrlResult Position
pos Char
c (Position -> Position
tabPos Position
pos)

    ctrlResult :: Position
-> Char -> b -> c -> (Maybe (Either a CHSToken), b, c, Maybe a)
ctrlResult pos :: Position
pos c :: Char
c pos' :: b
pos' s :: c
s = 
      (Either a CHSToken -> Maybe (Either a CHSToken)
forall a. a -> Maybe a
Just (Either a CHSToken -> Maybe (Either a CHSToken))
-> Either a CHSToken -> Maybe (Either a CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either a CHSToken
forall a b. b -> Either a b
Right (Position -> Char -> CHSToken
CHSTokCtrl Position
pos Char
c), b
pos', c
s, Maybe a
forall a. Maybe a
Nothing)

-- start of a binding hook (ie, enter the binding hook lexer)
--
hook :: CHSLexer
hook :: CHSLexer
hook  = String -> CHSRegexp
forall s t. String -> Regexp s t
string "{#"
        CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` \_ pos :: Position
pos s :: CHSLexerState
s -> (Maybe (Either Error CHSToken)
forall a. Maybe a
Nothing, Position -> Int -> Position
incPos Position
pos 2, CHSLexerState
s, CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just CHSLexer
bhLexer)

-- pre-processor directives and `#c'
--
--  * we lex `#c' as a directive and special case it in the action
--
--  * we lex C line number pragmas and special case it in the action
--
cpp :: CHSLexer
cpp :: CHSLexer
cpp = CHSLexer
directive
      where
        directive :: CHSLexer
directive = 
          String -> CHSRegexp
forall s t. String -> Regexp s t
string "\n#" CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> String -> CHSRegexp
forall s t. String -> Regexp s t
alt ('\t'Char -> ShowS
forall a. a -> [a] -> [a]
:String
inlineSet)CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` CHSRegexp
forall s t. Regexp s t
epsilon
          CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` 
             \(_:_:dir :: String
dir) pos :: Position
pos s :: CHSLexerState
s ->        -- strip off the "\n#"
               case String
dir of
                 ['c']                      ->          -- #c
                   (Maybe (Either Error CHSToken)
forall a. Maybe a
Nothing, Position -> Position
retPos Position
pos, CHSLexerState
s, CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just CHSLexer
cLexer)
                 -- a #c may be followed by whitespace
                 'c':sp :: Char
sp:_ | Char
sp Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` " \t" ->          -- #c
                   (Maybe (Either Error CHSToken)
forall a. Maybe a
Nothing, Position -> Position
retPos Position
pos, CHSLexerState
s, CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just CHSLexer
cLexer)
                 ' ':line :: String
line@(n :: Char
n:_) | Char -> Bool
isDigit Char
n ->                 -- C line pragma
                   let pos' :: Position
pos' = String -> Position -> Position
adjustPosByCLinePragma String
line Position
pos
                    in (Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a. a -> Maybe a
Just (Either Error CHSToken -> Maybe (Either Error CHSToken))
-> Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either Error CHSToken
forall a b. b -> Either a b
Right (Position -> CHSToken
CHSTokLine Position
pos'), Position
pos', CHSLexerState
s, Maybe CHSLexer
forall a. Maybe a
Nothing)
                 _                            ->        -- CPP directive
                   (Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a. a -> Maybe a
Just (Either Error CHSToken -> Maybe (Either Error CHSToken))
-> Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either Error CHSToken
forall a b. b -> Either a b
Right (Position -> String -> CHSToken
CHSTokCPP Position
pos String
dir), 
                    Position -> Position
retPos Position
pos, CHSLexerState
s, Maybe CHSLexer
forall a. Maybe a
Nothing)

adjustPosByCLinePragma :: String -> Position -> Position
adjustPosByCLinePragma :: String -> Position -> Position
adjustPosByCLinePragma str :: String
str (Position fname :: String
fname _ _) = 
  (String -> Int -> Int -> Position
Position String
fname' Int
row' 0)
  where
    str' :: String
str'            = ShowS
dropWhite String
str
    (rowStr :: String
rowStr, str'' :: String
str'') = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isDigit String
str'
    row' :: Int
row'            = String -> Int
forall a. Read a => String -> a
read String
rowStr
    str''' :: String
str'''          = ShowS
dropWhite String
str''
    fnameStr :: String
fnameStr        = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= '"') ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ShowS
forall a. Int -> [a] -> [a]
drop 1 ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ String
str'''
    fname' :: String
fname'          | String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
str''' Bool -> Bool -> Bool
|| String -> Char
forall a. [a] -> a
head String
str''' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= '"' = String
fname
                    -- try and get more sharing of file name strings
                    | String
fnameStr String -> String -> Bool
forall a. Eq a => a -> a -> Bool
== String
fname                 = String
fname
                    | Bool
otherwise                         = String
fnameStr
    --
    dropWhite :: ShowS
dropWhite = (Char -> Bool) -> ShowS
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (\c :: Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== ' ' Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '\t')

-- the binding hook lexer
--
bhLexer :: CHSLexer
bhLexer :: CHSLexer
bhLexer  =      CHSLexer
identOrKW
           CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
symbol
           CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
strlit
           CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
hsverb
           CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
whitespace
           CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
endOfHook
           CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> CHSRegexp
forall s t. String -> Regexp s t
string "--" CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> CHSRegexp
forall s t. Regexp s t
anyButNLCHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '\n'   -- comment
                CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` \_ pos :: Position
pos s :: CHSLexerState
s -> (Maybe (Either Error CHSToken)
forall a. Maybe a
Nothing, Position -> Position
retPos Position
pos, CHSLexerState
s, Maybe CHSLexer
forall a. Maybe a
Nothing)
           where
             anyButNL :: Regexp s t
anyButNL  = String -> Regexp s t
forall s t. String -> Regexp s t
alt (String
anySet String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ ['\n'])
             endOfHook :: CHSLexer
endOfHook = String -> CHSRegexp
forall s t. String -> Regexp s t
string "#}"
                         CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta` 
                          \_ pos :: Position
pos s :: CHSLexerState
s -> (Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a. a -> Maybe a
Just (Either Error CHSToken -> Maybe (Either Error CHSToken))
-> Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either Error CHSToken
forall a b. b -> Either a b
Right (Position -> CHSToken
CHSTokEndHook Position
pos), 
                                       Position -> Int -> Position
incPos Position
pos 2, CHSLexerState
s, CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just CHSLexer
chslexer)

-- the inline-C lexer
--
cLexer :: CHSLexer
cLexer :: CHSLexer
cLexer =      CHSLexer
forall s. Lexer s CHSToken
inlineC                     -- inline C code
         CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
ctrl                        -- control code (preserved)
         CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> CHSRegexp
forall s t. String -> Regexp s t
string "\n#endc"            -- end of inline C code...
              CHSRegexp -> Meta CHSLexerState CHSToken -> CHSLexer
forall s t. Regexp s t -> Meta s t -> Lexer s t
`lexmeta`                   -- ...preserve '\n' as control token
              \_ pos :: Position
pos s :: CHSLexerState
s -> (Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a. a -> Maybe a
Just (Either Error CHSToken -> Maybe (Either Error CHSToken))
-> Either Error CHSToken -> Maybe (Either Error CHSToken)
forall a b. (a -> b) -> a -> b
$ CHSToken -> Either Error CHSToken
forall a b. b -> Either a b
Right (Position -> Char -> CHSToken
CHSTokCtrl Position
pos '\n'), Position -> Position
retPos Position
pos, CHSLexerState
s, 
                           CHSLexer -> Maybe CHSLexer
forall a. a -> Maybe a
Just CHSLexer
chslexer)
         where
           inlineC :: Lexer s CHSToken
inlineC = String -> Regexp s CHSToken
forall s t. String -> Regexp s t
alt String
inlineSet Regexp s CHSToken -> Action CHSToken -> Lexer s CHSToken
forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` Action CHSToken
copyVerbatimC
           --
           copyVerbatimC :: CHSAction 
           copyVerbatimC :: Action CHSToken
copyVerbatimC cs :: String
cs pos :: Position
pos = CHSToken -> Maybe CHSToken
forall a. a -> Maybe a
Just (CHSToken -> Maybe CHSToken) -> CHSToken -> Maybe CHSToken
forall a b. (a -> b) -> a -> b
$ Position -> String -> CHSToken
CHSTokC Position
pos String
cs

-- whitespace
--
--  * horizontal and vertical tabs, newlines, and form feeds are filter out by
--   `Lexers.ctrlLexer' 
--
whitespace :: CHSLexer
whitespace :: CHSLexer
whitespace  =      (Char -> CHSRegexp
forall s t. Char -> Regexp s t
char ' ' CHSRegexp -> Action CHSToken -> CHSLexer
forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` \_ _ -> Maybe CHSToken
forall a. Maybe a
Nothing)
              CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< CHSLexer
forall s t. Lexer s t
ctrlLexer

-- identifiers and keywords
--
identOrKW :: CHSLexer
--
-- the strictness annotations seem to help a bit
--
identOrKW :: CHSLexer
identOrKW  = 
       -- identifier or keyword
       (CHSRegexp
forall s t. Regexp s t
letter CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> (CHSRegexp
forall s t. Regexp s t
letter CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< CHSRegexp
forall s t. Regexp s t
digit CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '\'')CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` CHSRegexp
forall s t. Regexp s t
epsilon
       CHSRegexp -> (String -> Position -> Name -> CHSToken) -> CHSLexer
`lexactionName` \cs :: String
cs pos :: Position
pos name :: Name
name -> (Position -> String -> Name -> CHSToken
idkwtok (Position -> String -> Name -> CHSToken)
-> Position -> String -> Name -> CHSToken
forall a b. (a -> b) -> a -> b
$!Position
pos) String
cs Name
name)
  CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< -- identifier in single quotes
       (Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '\'' CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> CHSRegexp
forall s t. Regexp s t
letter CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> (CHSRegexp
forall s t. Regexp s t
letter CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< CHSRegexp
forall s t. Regexp s t
digit)CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '\''
       CHSRegexp -> (String -> Position -> Name -> CHSToken) -> CHSLexer
`lexactionName` \cs :: String
cs pos :: Position
pos name :: Name
name -> (Position -> String -> Name -> CHSToken
mkid (Position -> String -> Name -> CHSToken)
-> Position -> String -> Name -> CHSToken
forall a b. (a -> b) -> a -> b
$!Position
pos) String
cs Name
name)
       -- NB: quotes are removed by lexemeToIdent
  where
    idkwtok :: Position -> String -> Name -> CHSToken
idkwtok pos :: Position
pos "as"               _    = Position -> CHSToken
CHSTokAs      Position
pos
    idkwtok pos :: Position
pos "call"             _    = Position -> CHSToken
CHSTokCall    Position
pos
    idkwtok pos :: Position
pos "class"            _    = Position -> CHSToken
CHSTokClass   Position
pos
    idkwtok pos :: Position
pos "context"          _    = Position -> CHSToken
CHSTokContext Position
pos
    idkwtok pos :: Position
pos "deriving"         _    = Position -> CHSToken
CHSTokDerive  Position
pos
    idkwtok pos :: Position
pos "enum"             _    = Position -> CHSToken
CHSTokEnum    Position
pos
    idkwtok pos :: Position
pos "foreign"          _    = Position -> CHSToken
CHSTokForeign Position
pos
    idkwtok pos :: Position
pos "fun"              _    = Position -> CHSToken
CHSTokFun     Position
pos
    idkwtok pos :: Position
pos "get"              _    = Position -> CHSToken
CHSTokGet     Position
pos
    idkwtok pos :: Position
pos "import"           _    = Position -> CHSToken
CHSTokImport  Position
pos
    idkwtok pos :: Position
pos "lib"              _    = Position -> CHSToken
CHSTokLib     Position
pos
    idkwtok pos :: Position
pos "newtype"          _    = Position -> CHSToken
CHSTokNewtype Position
pos
    idkwtok pos :: Position
pos "pointer"          _    = Position -> CHSToken
CHSTokPointer Position
pos
    idkwtok pos :: Position
pos "prefix"           _    = Position -> CHSToken
CHSTokPrefix  Position
pos
    idkwtok pos :: Position
pos "pure"             _    = Position -> CHSToken
CHSTokPure    Position
pos
    idkwtok pos :: Position
pos "qualified"        _    = Position -> CHSToken
CHSTokQualif  Position
pos
    idkwtok pos :: Position
pos "set"              _    = Position -> CHSToken
CHSTokSet     Position
pos
    idkwtok pos :: Position
pos "sizeof"           _    = Position -> CHSToken
CHSTokSizeof  Position
pos
    idkwtok pos :: Position
pos "stable"           _    = Position -> CHSToken
CHSTokStable  Position
pos
    idkwtok pos :: Position
pos "type"             _    = Position -> CHSToken
CHSTokType    Position
pos
    idkwtok pos :: Position
pos "underscoreToCase" _    = Position -> CHSToken
CHSTok_2Case  Position
pos
    idkwtok pos :: Position
pos "unsafe"           _    = Position -> CHSToken
CHSTokUnsafe  Position
pos
    idkwtok pos :: Position
pos "with"             _    = Position -> CHSToken
CHSTokWith    Position
pos
    idkwtok pos :: Position
pos "lock"             _    = Position -> CHSToken
CHSTokLock    Position
pos
    idkwtok pos :: Position
pos "nolock"           _    = Position -> CHSToken
CHSTokNolock  Position
pos
    idkwtok pos :: Position
pos cs :: String
cs                 name :: Name
name = Position -> String -> Name -> CHSToken
mkid Position
pos String
cs Name
name
    --
    mkid :: Position -> String -> Name -> CHSToken
mkid pos :: Position
pos cs :: String
cs name :: Name
name = Position -> Ident -> CHSToken
CHSTokIdent Position
pos (Position -> String -> Name -> Ident
lexemeToIdent Position
pos String
cs Name
name)

-- reserved symbols
--
symbol :: CHSLexer
symbol :: CHSLexer
symbol  =      String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym "->" Position -> CHSToken
CHSTokArrow
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym "=>" Position -> CHSToken
CHSTokDArrow
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym "."  Position -> CHSToken
CHSTokDot
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym ","  Position -> CHSToken
CHSTokComma
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym "="  Position -> CHSToken
CHSTokEqual
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym "-"  Position -> CHSToken
CHSTokMinus
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym "*"  Position -> CHSToken
CHSTokStar
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym "&"  Position -> CHSToken
CHSTokAmp
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym "^"  Position -> CHSToken
CHSTokHat
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym "{"  Position -> CHSToken
CHSTokLBrace
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym "}"  Position -> CHSToken
CHSTokRBrace
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym "("  Position -> CHSToken
CHSTokLParen
          CHSLexer -> CHSRegexp
forall s t. Lexer s t -> Lexer s t -> Lexer s t
>||< String -> (Position -> CHSToken) -> CHSLexer
forall t s. String -> (Position -> t) -> Lexer s t
sym ")"  Position -> CHSToken
CHSTokRParen
          where
            sym :: String -> (Position -> t) -> Lexer s t
sym cs :: String
cs con :: Position -> t
con = String -> Regexp s t
forall s t. String -> Regexp s t
string String
cs Regexp s t -> Action t -> Lexer s t
forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` \_ pos :: Position
pos -> t -> Maybe t
forall a. a -> Maybe a
Just (Position -> t
con Position
pos)

-- string
--
strlit :: CHSLexer
strlit :: CHSLexer
strlit  = Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '"' CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> (CHSRegexp
forall s t. Regexp s t
instr CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '\\')CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '"'
          CHSRegexp -> Action CHSToken -> CHSLexer
forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` \cs :: String
cs pos :: Position
pos -> CHSToken -> Maybe CHSToken
forall a. a -> Maybe a
Just (Position -> String -> CHSToken
CHSTokString Position
pos (ShowS
forall a. [a] -> [a]
init ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
forall a. [a] -> [a]
tail ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ String
cs))

-- verbatim code
--
hsverb :: CHSLexer
hsverb :: CHSLexer
hsverb  = Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '`' CHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
+> CHSRegexp
forall s t. Regexp s t
inhsverbCHSRegexp -> CHSRegexp -> CHSRegexp
forall s t. Regexp s t -> Regexp s t -> Regexp s t
`star` Char -> CHSRegexp
forall s t. Char -> Regexp s t
char '\''
          CHSRegexp -> Action CHSToken -> CHSLexer
forall s t. Regexp s t -> Action t -> Lexer s t
`lexaction` \cs :: String
cs pos :: Position
pos -> CHSToken -> Maybe CHSToken
forall a. a -> Maybe a
Just (Position -> String -> CHSToken
CHSTokHSVerb Position
pos (ShowS
forall a. [a] -> [a]
init ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
forall a. [a] -> [a]
tail ShowS -> ShowS
forall a b. (a -> b) -> a -> b
$ String
cs))


-- regular expressions
--
letter, digit, instr, inchar, inhsverb :: Regexp s t
letter :: Regexp s t
letter   = String -> Regexp s t
forall s t. String -> Regexp s t
alt ['a'..'z'] Regexp s t -> Regexp s t -> Regexp s t
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< String -> Regexp s t
forall s t. String -> Regexp s t
alt ['A'..'Z'] Regexp s t -> Regexp s t -> Regexp s t
forall s t. Regexp s t -> Regexp s t -> Regexp s t
>|< Char -> Regexp s t
forall s t. Char -> Regexp s t
char '_'
digit :: Regexp s t
digit    = String -> Regexp s t
forall s t. String -> Regexp s t
alt ['0'..'9']
instr :: Regexp s t
instr    = String -> Regexp s t
forall s t. String -> Regexp s t
alt ([' '..'\127'] String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ "\"\\")
inchar :: Regexp s t
inchar   = String -> Regexp s t
forall s t. String -> Regexp s t
alt ([' '..'\127'] String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ "\'")
inhsverb :: Regexp s t
inhsverb = String -> Regexp s t
forall s t. String -> Regexp s t
alt ([' '..'\127'] String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ "\'")

-- character sets
--
anySet, inlineSet, specialSet, commentSpecialSet, ctrlSet :: [Char]
anySet :: String
anySet            = ['\0'..'\255']
inlineSet :: String
inlineSet         = String
anySet String -> ShowS
forall a. Eq a => [a] -> [a] -> [a]
\\ String
ctrlSet
specialSet :: String
specialSet        = ['{', '-', '"', '\'']
commentSpecialSet :: String
commentSpecialSet = ['{', '-']
ctrlSet :: String
ctrlSet           = ['\n', '\f', '\r', '\t', '\v']


-- main lexing routine
-- -------------------

-- generate a token sequence out of a string denoting a CHS file
-- (EXPORTED) 
--
--  * the given position is attributed to the first character in the string
--
--  * errors are entered into the compiler state
--
lexCHS        :: String -> Position -> CST s [CHSToken]
lexCHS :: String -> Position -> CST s [CHSToken]
lexCHS cs :: String
cs pos :: Position
pos  = 
  do
    CHSLexerState
state <- CST s CHSLexerState
forall s. CST s CHSLexerState
initialState
    let (ts :: [CHSToken]
ts, lstate :: LexerState CHSLexerState
lstate, errs :: [Error]
errs) = CHSLexer
-> LexerState CHSLexerState
-> ([CHSToken], LexerState CHSLexerState, [Error])
forall s t.
Lexer s t -> LexerState s -> ([t], LexerState s, [Error])
execLexer CHSLexer
chslexer (String
cs, Position
pos, CHSLexerState
state)
        (_, pos' :: Position
pos', state' :: CHSLexerState
state')  = LexerState CHSLexerState
lstate
    (Error -> PreCST SwitchBoard s ())
-> [Error] -> PreCST SwitchBoard s [()]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM Error -> PreCST SwitchBoard s ()
forall e s. Error -> PreCST e s ()
raise [Error]
errs
    Position -> CHSLexerState -> PreCST SwitchBoard s ()
forall s. Position -> CHSLexerState -> CST s ()
assertFinalState Position
pos' CHSLexerState
state'
    [CHSToken] -> CST s [CHSToken]
forall (m :: * -> *) a. Monad m => a -> m a
return [CHSToken]
ts