. when checking for existence,
the word (is) could be used in 2 ways:
(is x) checks for existence,
(x is t) checks for x's type = t .
. another idea is having support for
english-style postfix operators:
then you could write (x exists)
and you'd parse this like an infix
except you're not looking for a 2nd arg .
. its syntax for being declared
could be similar to that of other functions:
myPrefix(arg.argType).ReturnType,
(arg.t)myPostfix.ReturnType,
myInfix(arg1,arg2:t).ReturnType .
-- anything with 2 args is an infix operator .
the parser's algorithm:
. subexpressions are composed of atomsthat are the leaf in trees branched by
pre-, post- and infix operators;
in addition to the operators
branching is formed by enclosures;
eg, when we find any of:
( { [ <.
we call get-subexpression,
which considers its terminators to be:
, ; . : ) } ] .>
(depending on context and spacing);
and, anything in between these terminators
will be a subexpression .
find a term:
. a term is whatever results in a linear list,as opposed to a branching tree .
. we might find a string of prefix operators,
we eventually find an atom,
and then we might find a string of postfixes .
. so to start,
we're expecting either an atom or a prefix:
. when we finally find an atom,
we can call [find end of term].
[find end of term]:
. after getting the next token (that is,a symbol within the text we are parsing)
there are several cases:
# terminator:
. we found a complete subexpression .
# postfix:
. a postfix is treed just like an infix
because it's simpler for robots to read
(we invented the idea of a postfix
just for humans to see things their way,
but humans won't see our parse tree).
. to position our postix in the list,
it is supposed to be placed at
the head of the current term's list;
if there is more than one prefix operator,
it might not apply to the entire list,
so we have to place it with type matching,
starting at the top and working down
until finding that matching type
(that's the way english intuitively reads it:
it's placed at the end because
it applies to everything before it;
that's english's way of
avoiding long parentheticals).
. this might be the end of the term;
or, there could be more postfix;
so recurse: call [find end of term].
# infix:
. we have found end of term;
so we call [build a biop tree](term)
-- the term we just found
is made to be the binary operator's first arg,
and will now find the 2nd arg,
which is a subexpression
(that might either be a term,
or another biop subtree ).
tree a binary operator
. the algorithm for [build a biop tree](term)is already designed elsewhere .
[3.21:
. actually, that still needs work:
the 1988 version is included below;
and, you can see it uses precedence ordering,
but in our current idea for treebiop
we have the same problem as for postfix placement:
we need to find the matching type;
eg, let b.truth, and let x,y,z,t: num;
then parse ( b or x + y * z = t );
when we get to the x,
there's a type mismatch -- truth OR number --
so we tree-in a place-holder:
(b or { (x ...) })
then for the (x+y*z) we need a precedence table
to tell us that prec(*) < prec(+)
and from that we get ( b or {(x+(y*z)) ...} )
then "=" is taking numbers and returning truth;
so, we parse it as being above num
and also being below truth:
(b or {(x+y*z) = (...)})
and finally (t), a number,
matches the (number = ...):
( b or {(x+y*z) = (t)} ) .]
[ the following is Vax Pascal language:
(I'm not sure if this code works,
but a possibly later version of it
did pass some tests .]
{ disk sys2: parse }{[ENVIRONMENT]}
[INHERIT( 'TEXT.PEN')]
MODULE parser( parsed, input, output );
CONST
letters = ['a'..'z','A'..'Z','_'];
base10 = ['0'..'9'];
TYPE
N= 0..maxint; Z= integer;
V= VARYING[ 31 ] OF char;
express =^parsing;
parsing =RECORD
CASE node: parse_type OF
operand:(
CASE symbolic: boolean OF
true:(symbol:V);
false:(value:N) end;
operator:(
operation:V
inverse: boolean;
unary, binary: express )
end END;
lexel_ID =( error, number, name, left_par, sign, post_fix, comma, right_par, binary );
precidence_ord = 0..5;
VAR parsed:text;
FUNCTION fun( meaning:lexel_ID, control, lexel:V ): lexel_ID;
BEGIN fun:= meaning; pro(meaning,control,lexel)
END;
PROCEDURE pro( meaning:lexel_ID, control, lexel:V );
BEGIN write( 'CONTROL: ', control, ' DATA: ', lexel );
writeV(lexel,meaning); writeln( ' ID: ', lexel )
END
PROCEDURE get_beyond_SP;
BEGIN while (parsed^ in[''(32),''(9)]) and not eof(parsed) do get(parsed)
END;
FUNCTION Cap( c:char ):char; CONST a=97; z=122; VAR x:N;
BEGIN x:= ord(c); IF (a <=x)and(x<= z) THEN cap:= chr( x-32 ) ELSE Cap:= c
END;
PROCEDURE capV( var T:V ); VAR i:N;
BEGIN for i:= 1 to T.LENGTH do T.BODY[i]:= cap(T.BODY[i])
END;
PROCEDURE RDcat( var str:V );
BEGIN str:= str +parsed^; get(parsed)
END;
PROCEDURE get_name( var lexel:V );
BEGIN lexel:=''; while (parsed^ in(letters +base10)) and not eof(parsed) do RDcat( lexel ); get_beyond_SP
END;
PROCEDURE get_N( var lexel:V );
BEGIN lexel:=''; while (parsed^ in(['_']+base10)) and not eof(parsed) do RDcat( lexel ); get_beyond_SP
END;
FUNCTION new_unary( lexel:V; operand: express ): express;
VAR node: express;
{note --inverse of operand is the
'multiplicative inverse: (1/operand),
since this what makes the graph flip
on its identity axis in the graph of m*x }
BEGIN new( node, operation );
WITH node do begin
unary:= operand;
binary:= NIL;
inverse:= false; operation:= lexel;
END
FUNCTION new_binary( lexel:V; and1, and2: express ): express;
VAR node: express;
BEGIN new( node, operation );
WITH node do begin
unary:= and1;
binary:= and2;
inverse:= false; operation:= lexel
end END;
FUNCTION new_value( number:N ): express; VAR node: express;
BEGIN new( node, operand, false );
node^.value:= number
END;
FUNCTION new_symbol( lexel:V ): express; VAR node: express;
BEGIN new( node, operand, true );
node^.symbol:= lexel
END;
FUNCTION p( Q:express ):precidence_ord; VAR T:V;
BEGIN if not( Q^.node=operator ) then
writeln( 'p(rec variant)' )
else
BEGIN T:=Q^.operation; capV( T ); pro( binary, 'p', T );
IF (T='**')or (T='//') THEN
p:=1
ELSE IF (T='*')or(T='/')or(T='DIV')or(T='MOD')or(T='REM')THEN
p:=2
ELSE IF (T='+')or(T='-') THEN
p:=3
ELSE IF (T= '=')or(T='>=')or(T='<=')or (T= '/=')or(T='IN')THEN
p:=4
ELSE IF (T='AND')or(T='OR')or (T='XOR')or (T='<=>')or(T= '==>')THEN
p:=5
ELSE p:=0
END END;
FUNCTION ID_next( var L:V ):lexel_ID; VAR I:lexel_ID; T:V;
PROCEDURE getI( ID:lexel_ID ); BEGIN I:= ID; get(parsed) END;
BEGIN
get_beyond_SP; L:=''; I:= error;
CASE parsed^ OF
'(': getI( left_par );
')': getI( right_par );
',': getI( comma );
'a'..'z','A'..'Z':begin get_name(L); T:=Cap(L);
IF (T='AND')or(T='OR')or(T='XOR')or(T='DIV')or(T='MOD')or(T='REM')or(T='IN') THEN
I:= binary
ELSE IF parsed^='(' THEN begin
I:= post_fix; get(parsed) end
ELSE
I:= name
end;
'+','-': begin I:= sign; RDcat(L) end;
'*': begin I:= binary; RDcat(L);
IF parsed^='*' THEN RDcat( L ) end;
'/': begin I:= binary; RDcat(L);
IF (parsed^= '/')or(parsed^= '=') THEN RDcat( L ) end;
'=': begin RDcat(L);
IF parsed^ = '=' THEN begin RDcat( L );
IF parsed^= '>' THEN begin RDcat( L ); I:= binary end
ELSE I:= binary end;
'>':begin I:= binary; RDcat(L); IF parsed^ = '=' THEN RDcat( L ) end;
'<':begin I:= binary; RDcat(L);
IF parsed^ = '=' THEN begin
RDcat( L )
IF parsed^='>' THEN
RDcat( L )
end end
end;
ID_next:= I
END;
PROCEDURE Treebiop( var root:express; leaf: express); FORWARD;
FUNCTION get_operand: express; FORWARD;
FUNCTION subexpress: express;
{--this returns to caller when a right_par
or a comma, or eoln(parsed) is found,
thus its tree is an expressions spanning
,-) or (-) or ,-, or (-,
or from beginning to end of file
since it is the initiating procedure.
The first thing it identifies will not be a biop,
but everything else.
The second thing it identifies
must be a biop or an exit prompt. }
VAR lexel:V; SE:express;
BEGIN
SE:= get_operand;
IF SE = NIL THEN
SubExpression:= NIL
ELSE BEGIN
CASE fun( ID_next( lexel ), 'subexpress', lexel) OF
error, comma, right_par:
SE:= SubExpression;
binary: begin
SE:= new_binary( lexel,
SE,
NIL );
Treebiop( SE,SE ) end;
left_par, number, name, sign, post_fix:
writeln( 'subexpress logic')
end
END;
FUNCTION get_parameter_list: express;
VAR delimiter: express;
BEGIN
new( delimiter, operator );
WITH delimiter do begin
unary:= subexpress;
binary:= get_operand;
inverse:= false; operation:= '' end;
get_parameter_list:= delimiter
END;
FUNCTION get_number; express;
{--finds N.NE-N and parses into
operations on integers }
VAR node:express; L:V; int,frac,exp:N;
BEGIN
get_N(L); readV( L, int );
node:= new_value( int );
IF cap(parsed^) = '.' THEN begin
get(parsed); get_N(L); readV(L,frac);
node:= new_binary( '+',
node,
new_binary( '/'
new_value( frac ),
new_value( 10**(L.LENGTH) )
)) end;
IF cap(parsed^) = 'E' THEN begin
get(parsed); get_N(L); readV(L,exp);
node:= new_binary( '*',
node,
new_binary( '**'
new_value( 10 ),
new_value( exp )
)) end;
get_number:= node
END;
FUNCTION get_operand: express;
VAR lexel:V; GO:express;
BEGIN
CASE fun( ID_next(lexel), 'get_operand', lexel ) OF
left_par: GO:= subexpress;
name: GO:= new_symbol( lexel );
post_fix: GO:= new_unary( lexel, get_parameter_list );
binary, comma, error, right_par: GO:= NIL;
number: GO:= get_number;
sign: GO:=
new_unary( lexel, get_operand )
end get_operand:= GO
END;
PROCEDURE Treebiop( var root:express; leaf: express);
{ --Once a binary operator is
positioned with a unary operand,
this routine uses the following
binary operator's precidence ord
to correctly place it and the
operators' mutually adjacent operand
within the expression tree. }
VAR and2, op1: express;
root_elevated:boolean;
FUNCTION correct_precidence: boolean;
BEGIN correct_precidence:=
(p(op1)<=p(op2)) or (op1=leaf)
END;
BEGIN{Treebiop}
op1:= root; and2:= get_operand;
IF and2=NIL THEN
writeln(op1^.operation,'no and2');
ELSE IF not(fun( ID_next( lexel ),'treebiop', lexel ) = binary )THEN
leaf^.binary:= and2
ELSE BEGIN
pro( binary, 'treebiop', lexel );
root_elevated:= correct_precidence;
WHILE not correct_precidence do
op1:= op1^.binary;
IF p(op1)<=p(op2) THEN begin
{ op1 has precidence over op2,
so op2 is the new root, with op1 as an operand.}
op1^.binary:= and2;
op1:= new_binary( lexel,
op1;
NIL );
IF root_elevated THEN
root:= op1;
Treebiop(root,op1) end
ELSE begin
op1^.binary:= new_binary( lexel,
and2;
NIL );
leaf:= op1^.binary;
Treebiop(root,leaf) end
END;
END.
4.3: adda/syntax/multi-infix operator notation:
ReplyDelete. just as postfix can be defined
we can define multi-infix operators
and search for all possibilities to flag for ambiguity .