the parser relies on some “monadic” programming idioms
basically, parser combinator (But 非常麻烦 in Coq)
Lex
1
2
3
4
5
6
7
8
9
10
| Inductive chartype := white | alpha | digit | other.
Definition classifyChar (c : ascii) : chartype :=
if isWhite c then white
else if isAlpha c then alpha
else if isDigit c then digit
else other.
Definition token := string.
|
Syntax
带 error msg 的 option
:
1
2
3
4
5
6
| Inductive optionE (X:Type) : Type :=
| SomeE (x : X)
| NoneE (s : string). (** w/ error msg **)
Arguments SomeE {X}.
Arguments NoneE {X}.
|
Monadic:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
| Notation "' p <- e1 ;; e2"
:= (match e1 with
| SomeE p ⇒ e2
| NoneE err ⇒ NoneE err
end)
(right associativity, p pattern, at level 60, e1 at next level).
Notation "'TRY' ' p <- e1 ;; e2 'OR' e3"
:= (match e1 with
| SomeE p ⇒ e2
| NoneE _ ⇒ e3
end)
(right associativity, p pattern,
at level 60, e1 at next level, e2 at next level).
|
1
2
| Definition parser (T : Type) :=
list token → optionE (T * list token).
|
1
2
3
4
5
6
7
| newtype Parser a = Parser (String -> [(a,String)])
instance Monad Parser where
-- (>>=) :: Parser a -> (a -> Parser b) -> Parser b
p >>= f = P (\inp -> case parse p inp of
[] -> []
[(v,out)] -> parse (f v) out)
|
combinator many
Coq vs. Haskell
- explicit recursion depth, .e. step-indexed
- explicit exception
optionE
(in Haskell, it’s hidden behind the Parser
Monad as []
)
- explicit string state
xs
(in Haskell, it’s hidden behind the Parser
Monad as String -> String
)
- explicit
acc
epted token (in Haskell, it’s hidden behind the Parser
Monad as a
, argument)
1
2
3
4
5
6
7
8
9
10
11
12
| Fixpoint many_helper {T} (p : parser T) acc steps xs :=
match steps, p xs with
| 0, _ ⇒
NoneE "Too many recursive calls"
| _, NoneE _ ⇒
SomeE ((rev acc), xs)
| S steps', SomeE (t, xs') ⇒
many_helper p (t :: acc) steps' xs'
end.
Fixpoint many {T} (p : parser T) (steps : nat) : parser (list T) :=
many_helper p [] steps.
|
1
2
3
4
5
6
7
8
9
| manyL :: Parser a -> Parser [a]
manyL p = many1L p <++ return [] -- left biased OR
many1L :: Parser a -> Parser [a]
many1L p = (:) <$> p <*> manyL p
-- or
many1L p = do x <- p
xs <- manyL p
return (x : xs)
|
ident
1
2
3
4
5
6
7
| Definition parseIdentifier (xs : list token) : optionE (string * list token) :=
match xs with
| [] ⇒ NoneE "Expected identifier"
| x::xs' ⇒ if forallb isLowerAlpha (list_of_string x)
then SomeE (x, xs')
else NoneE ("Illegal identifier:'" ++ x ++ "'")
end.
|
1
2
3
4
| ident :: Parser String
ident = do x <- lower
xs <- many alphanum
return (x:xs)
|