hi static
first:
i think that is better & smarter spend a few months trying everything than
do what i do i my two last interpreters and really there is no need to run and made
stupid mistakes...practice ..yes it will be ,,
What you present here is a pseudo-code for calculator - expression evaluator
but that is not enough for solid interpreter...do you agree with that ?
02-26-2018, 12:20 AM This post was last modified: 02-26-2018, 12:27 AM by STxAxTIC. Edited 0 times
Nope, quite the opposite: this is the most expressive toy on the block right now. If I presented the full code it would be more obvious. Here's what you want to also see I bet:
Code:
FUNCTION SubExecute$ (TheScriptIn AS STRING, TheModeIn AS STRING, ScopeSwitchIn AS STRING)
DIM TheReturn AS STRING
DIM TheScript AS STRING
DIM TheMode AS STRING
DIM ScopeSwitch AS STRING
DIM TheFile AS STRING
DIM AnchorCount AS INTEGER
DIM LineCount AS INTEGER
DIM LoopCounter AS INTEGER
DIM LoopIndex AS INTEGER
DIM LineIndex AS INTEGER
DIM k AS INTEGER
DIM m AS INTEGER
DIM c AS STRING
DIM d AS STRING
DIM EvalDone AS INTEGER
DIM FunctionListSizeOrig AS INTEGER
DIM VariableListSizeOrig AS INTEGER
TheReturn = ""
TheScript = TheScriptIn
TheMode = TheModeIn
ScopeSwitch = ScopeSwitchIn
AnchorCount = 0
LineCount = 0
LoopCounter = 0
LoopIndex = 0
LineIndex = 0
EvalDone = 0
IF (ScopeSwitch = "_yes") THEN
' Increment function and variable scope.
FunctionListSizeOrig = FunctionListSize
VariableListSizeOrig = VariableListSize
ScopeLevel = ScopeLevel + 1
FOR k = 1 TO FunctionListSize
FunctionListScope(k, 1, ScopeLevel) = FunctionList(k, 1)
FunctionListScope(k, 2, ScopeLevel) = FunctionList(k, 2)
NEXT
FOR k = 1 TO VariableListSize
VariableListScope(k, 1, ScopeLevel) = VariableList(k, 1)
VariableListScope(k, 2, ScopeLevel) = VariableList(k, 2)
NEXT
END IF
IF (TheMode = "file") THEN
TheFile = RemoveWrapping$(TheScript, "`'")
TheScript = ""
' CPP: STARTSKIP
' JavaScript: STARTSKIP
OPEN TheFile FOR INPUT AS #1
IF (FileStatus <> "ERROR") THEN
DO WHILE NOT EOF(1)
LINE INPUT #1, d
d = LTRIM$(RTRIM$(d))
TheScript = TheScript + d
LOOP
ELSE
TheScript = "print_`ERROR: File not found.'"
FileStatus = ""
END IF
CLOSE #1
' CPP: ENDSKIP
' JavaScript: ENDSKIP
' Scale arrays according to line count.
' Offsets of +1 added to accomodate C++ implementation.
LineCount = CountElements("(" + TheScript + ")", ":")
DIM ScriptLine(LineCount + 1) AS STRING
DIM ScriptLinePrintFlag(LineCount + 1) AS INTEGER
DIM LoopStartPosition(LineCount + 1) AS INTEGER
DIM LoopIterationCount(LineCount + 1) AS INTEGER
DIM AnchorList(LineCount + 1, 2) AS STRING
' CPP: STARTSKIP
'\\for (k = 1; k <= 299; k += 1) {
'\\ AnchorList[k] = new Array(2);
'\\}
' CPP: ENDSKIP
' Break input string apart at each (:) symbol.
m = 0
DO WHILE (1 = 1)
k = GetSeparatorPos("(" + TheScript + ")", ":", 1, 1) - 1
IF (k > -1) THEN
m = m + 1
ScriptLine(m) = LEFT$(TheScript, k - 1)
TheScript = RIGHT$(TheScript, LEN(TheScript) - k)
ELSE
m = m + 1
ScriptLine(m) = TheScript
EXIT DO
END IF
LOOP
' Prepare loops.
k = LineCount
DO WHILE (k > 1)
FOR k = LineCount TO 1 STEP -1
IF (ScriptLine(k) = "do") THEN
LoopCounter = LoopCounter + 1
ScriptLine(k) = "_do " + LTRIM$(STR$(LoopCounter))
LoopStartPosition(LoopCounter) = k
FOR m = k TO LineCount
IF (ScriptLine(m) = "loop") THEN
ScriptLine(m) = "_loop " + LTRIM$(STR$(LoopCounter))
EXIT FOR
END IF
NEXT
EXIT FOR
END IF
NEXT
LOOP
' Prepare `print' and `anchor'.
FOR k = 1 TO LineCount
c = ScriptLine(k)
ScriptLinePrintFlag(k) = 0
IF (LEFT$(c, 6) = "print_") THEN
ScriptLinePrintFlag(k) = 1
ScriptLine(k) = RIGHT$(c, LEN(c) - 6)
END IF
IF (LEFT$(c, 7) = "anchor_") THEN
AnchorCount = AnchorCount + 1
AnchorList(AnchorCount, 1) = RIGHT$(c, LEN(c) - 7)
AnchorList(AnchorCount, 2) = LTRIM$(STR$(k))
END IF
NEXT
' Main script evaluation loop.
DO WHILE (LineIndex < LineCount)
LineIndex = LineIndex + 1
c = ScriptLine(LineIndex)
EvalDone = 0
IF (LEFT$(c, 3) = "_do") THEN
EvalDone = 1
LoopIndex = VAL(RIGHT$(c, LEN(c) - 3))
LoopIterationCount(LoopIndex) = VAL(InternalEval$(ScriptLine(LineIndex - 1)))
END IF
IF (LEFT$(c, 5) = "_loop") THEN
EvalDone = 1
LoopIndex = VAL(RIGHT$(c, LEN(c) - 5))
IF (LoopIterationCount(LoopIndex) > 1) THEN
LoopIterationCount(LoopIndex) = LoopIterationCount(LoopIndex) - 1
LineIndex = LoopStartPosition(LoopIndex)
END IF
END IF
IF (LEFT$(c, 5) = "goto_") THEN
EvalDone = 1
FOR k = 1 TO AnchorCount
IF (AnchorList(k, 1) = RIGHT$(c, LEN(c) - 5)) THEN
LineIndex = VAL(AnchorList(k, 2))
END IF
NEXT
END IF
IF (LEFT$(c, 3) = "if_") THEN
EvalDone = 1
c = RIGHT$(c, LEN(c) - 3)
k = ScanForOperator(c, "@")
d = RIGHT$(c, LEN(c) - k)
c = LEFT$(c, k - 1)
c = InternalEval$(c)
IF (VAL(c) = 1) THEN
FOR k = 1 TO AnchorCount
IF (AnchorList(k, 1) = d) THEN
LineIndex = VAL(AnchorList(k, 2))
EXIT FOR
END IF
NEXT
END IF
END IF
IF (LEFT$(c, 7) = "anchor_") THEN
EvalDone = 1
END IF
IF (EvalDone = 0) THEN
EvalDone = 1
c = InternalEval$(c)
IF (ScriptLinePrintFlag(LineIndex) = 1) THEN
TheReturn = TheReturn + c
END IF
END IF
LOOP
IF (ScopeSwitch = "_yes") THEN
' Decrement function and variable scope.
FOR k = 1 TO FunctionListSize
FunctionList(k, 1) = FunctionListScope(k, 1, ScopeLevel)
FunctionList(k, 2) = FunctionListScope(k, 2, ScopeLevel)
NEXT
FOR k = 1 TO VariableListSize
VariableList(k, 1) = VariableListScope(k, 1, ScopeLevel)
VariableList(k, 2) = VariableListScope(k, 2, ScopeLevel)
NEXT
ScopeLevel = ScopeLevel - 1
FunctionListSize = FunctionListSizeOrig
VariableListSize = VariableListSizeOrig
END IF
02-26-2018, 01:48 AM This post was last modified: 02-26-2018, 01:48 AM by Pete. Edited 0 times
So you guys have a choice in front of you. You can either take Bill's advice, and try out your own methods, or you can ignore his advice and read his method. Now that's a real head scratcher!
Personally, I'm a run and gun enthusiast, too. I also love refining things after the fact, but when it comes to coding large projects, truth be told this method takes a lot of time, but it's also full or discovery and fulfillment. Too much research and reading other people's work and you wake up one morning feeling more like stenographer than a coder.
02-26-2018, 02:01 AM This post was last modified: 02-26-2018, 02:30 AM by bplus. Edited 0 times
(02-25-2018, 07:35 PM)Aurel Wrote: dear lord ..this forum have a such a stupid problems that i cannot
believe ..that stupid text editor not show properly my text ..damn!
i see that i must post image instead of text
Hi Aurel,
Try putting spaced out typing into a code block in the editor, that usually preserves the spaces you had so painstakingly put in and the forum editor laughs at your efforts and makes a terrible mess of things.
Code:
X = X + 1
variable equalSym variable operator number
OR
X = X + 1
ident equalSym ident operator number
Wow that IS way too much fooling around too!
Brunette: "Where were you born?"
Blonde: "The United States."
Brunette: "Which part?"
Blonde: "My whole body." Today's Joke
[$]> [$]>[$]>[$]>
Time to experiment with new idea for interpreter I am calling Bflat, I think we are all agreed we should play around with this stuff a bit, the book I linked to even said that! so I must be right.
I did pretty much what Stxaxtic did, although I read several books and countless papers on interpreter construction, when it came time to actually try writing one I put everything away, started writing up what I wanted it to do, how I wanted it to 'act', and what I thought was missing or a pain in a lot of other languages, and then I started with basic input and output and went from there. I ended up with an oddball language that has little resemblance to any established language, has no lexer (or scanner if you prefer), and has no restrictions on variable and label names other than no white space at the beginning or end of the name. It's not particularly elegant and there's a lot of repetitive code that I may work on one day, but it does what I wanted so I can't complain but so much.
I am talking about tokenizer not about
'main evaluator loop' dude ?
that is just a proof that we have ( in general) including me
very low knowlege about this thematic.
I can show you right now expresion evaluator written in Oxygen basic
which you cannot beat with your evaluation loop ...why?
simply because o2 is 5times faster than qb64 and the way how is
created is more efficient....yes that is !
you say scanner...
.scanner is just a part of tokenizer
in most cases loop or two loops which scan source code
char by char from given line of code.
when scanner match separator ( delimiter) he must know is a given
set of characters token or not..right?
ok i will show you very simple scanner as start:
Code:
'code:imple scanner in o2 by Aurel
int nLines,linePos=1, tErr=0
string srcArray[1000],buff,crlf = chr(13)+chr(10)
Function scan()
string srcLine
again:
srcLine = srcArray[linePos]
If srcLine = "" OR srcLine=chr(9) OR srcLine= " "
linePos=linePos+1 : goto again 'skip empty line
End if
'Tokenize(scrLine) 'call tokenizer - not part of example!
buff = buff + srcLine + crlf 'put string into test buffer
If tErr = 1 Then Exit Function ' if tokenization error then EXIT
linePos = linePos+1 ' pos++
If linePos <= nLines 'if linePos is LESS than number of Lines
scan() 'recursive call scan()
End if
End Function
'test --------------------------------------------------------------------
nLines=10
srcArray={ "New line 1","New line 2","New line 3"," ","New line 5","New line 6","New line 7","New line 8","New line 9","New line 10"}
scan() : print buff
I hope that you get is now what i think.
Do you can replicate this in qb64? i think yes...with some small modification.
Example assume 10 lines of code which is inside scrArray,so each element of array hold one
line of code.Of course code can be in one big string but then you must split it with
CRLF separator...ok ?
Here is math evaluator from my Ruben Interpreter
which work fine but is also 'let say slow' with big math calculation.
Code:
'math parser
Function VisibleToken(STRING s) as string
Select s
Case CR
' s = "newline"
Case LF
' s = "newline"
Case ""
' s = "nothing"
End Select
Return s
End Function
';Cleanup then end
Function Finish()
perr=1
Return 0
End Function
';Report an error
Function Error(string s)
print "Error: " + s + "."
End Function
';Report an error and abort
Function Abort(STRING s)
Error(s)
Finish()
End Function
';Report what was expected
Function Expected(STRING expect)
Abort("Expected: " + expect + ", got '" + VisibleToken(Look) + "'")
Return 0
End Function
';Read a character into Look
Goto GetChar_End
sub GetChar
'print "SUB->GETCHAR"
again:
Look = Mid (Stream ,StreamPos,1)
'print "LOOK:" + Look
StreamPos = StreamPos + 1
If Look= " " then goto again
Return
end sub
GetChar_End:
';Match a specific input character
Function Match(STRING s)
'print "MATCH<s>" + s
If Look <> s
'print "MATCH<Look>" + Look
Expected("'"+ s +"'")
Else
'print "MATCH<else>"
Gosub getchar
End If
End Function
';Get a number-----------------------------------------------------
Function GetNum() As Float
STRING Temp
If Asc(look) > 47 and Asc(look) < 58 ' read chars as numbers
'print "need number"
End if
While (Asc(Look) > 47 And Asc(Look) < 58) Or Asc(Look) = 46' dec.point
Temp = Temp + Look
Gosub getchar
Wend
Return Val(Temp)
End Function
';Get variable ----------------------------------------------------------
Function GetVar() as float
'print "SUB->GETVAR"
STRING Temp,func,expr
FLOAT tempv
'If Asc(look) < 96 and Asc(look) > 123 ' read chars as variable
' print ("need variable")
'end if
While (Asc(Look) > 64 And Asc(Look) < 95) OR (Asc(Look) > 96 And Asc(Look) < 123) OR (Asc(Look) > 47 And Asc(Look) < 58) '; Works
Temp = Temp + Look
Gosub getchar
'print "GetVar-TEMP:" + Temp
'IF
Wend
'test variable value .........GetIntValue (byref vName as string) As FLOAT
'print "LOOK:" + Look
IF Look <> "("
If instr(Temp,"[") = 0
tempv = GetIntValue(Temp) 'not Array
Else ' is array
' print "TEMP:is array:" + Temp
tempv = GetArrayValue(Temp)
End if
'Return tempv
END IF
'expected function..................
IF Look = "("
'temp = sin(90)
'print "fTemp:" + temp ' sin
Gosub getchar 'skip bracket ( Look +1
While Look <> "" ' ->...)
expr = expr + Look
Gosub getchar
Wend
'print "F->TEMP:" + expr ' 90
tempv=GetFunc(temp,expr)
'Return tempv
Gosub getchar
END IF
'...................................
Return tempv
End Function
Declare function Expression() as float
'==============================================================
Function Factor() as float
'print "SUB->FACTOR"
FLOAT Value
'get number ----------------------------
If Asc(Look) > 47 And Asc(Look) < 58
Return GetNum()
End if
'get variable --------------------------
If Asc(look) > 96 and asc(look) < 123 OR (Asc(Look) > 64 And Asc(Look) < 91)
return GetVar()
End if
'get parens ---------------------------
If look <> ""
Match("(")
Value = Expression()
Match(")")
End If
Return Value
End Function
'==============================================================
Function Term() as float
FLOAT Value,modV
Value = Factor()
While (Look = "%" ) Or (Look = "*" ) Or (Look = "/")
'modulus -> integer division
If Look = "%"
Gosub getchar
modV=Factor()
Value = round(Value \modV)
End If
If Look = "*"
Gosub getchar
Value = Value * Factor()
End If
If Look = "/"
Gosub getchar
Value = Value / Factor()
End If
Wend
Return Value
End Function
Function Expression() as float
FLOAT Value
If (Look = "-")
Gosub getchar
Value = -(Term())
Else
Value = Term()
End If
While (Look = "+") Or (Look = "-")
If Look = "+"
Gosub getchar
Value = Value + Term()
Else
Gosub getchar
Value = Value - Term()
End If
Wend
Return Value
End Function
Function EvaLLine(byval s as string) as float
'Stream = Replace(s, " ", "")
stream = s
StreamPos = 1
getchar()
'string out
Float Tempv
Tempv = Expression()
'If StreamPos < Len(Stream) '; Error check, you
'Expected("nothing") '; can remove them if you don't
'EndIf
'out=str(Temp) '; need the check
Return Tempv
End Function
Aurel, something tells me we are talking past eachother. Or at least one of us is. Before you decree any inferiorites in my work, beyond speed in certain situations as previously admitted, you fail to look like the actual expert here.
Didn't you abandon our conversation in 2015/6 when I cornered you on recursion in the Ruben interpreter? Pretty sure you owe me an example. Then I'll see your implementation of a combinator next.
Yes i was talking to you abut recursion but here is not question about that
i am talking about tokenizer and you constantly push into evaluator?
so i just add Ruben evaluator as example .
Recursion I was talking about with you , is connected with subroutines in Ruben
so we dont talk here about subroutines...right?
Expert heh...i am not expert i am just a hobby programmer with some
experience.
1.BUT what you say about given scanner?
2,Is that proper from my side?
Bill ...please if is not problem ..answer on concrete questions...
Didn't mean to derail the... discussion?... that was going on here. I tried to apologize for handing over a more-or-less completely unorthodox answer to the newbie's problem of how to structure their first interpreter. That said, I can't answer to the categorizations. Without knowing what you're asking specifically... But this isn't about me - so carry on, my bad.
Quote:Without knowing what you're asking specifically...
What i am asking ?????
I repeat so many times...so man read ...
I dont know if somone already or ever made just tokenizer
so i am asking HOW on best way create tokenizer for basic like interpreter??
I have many sources but are written in C/C++ C# and use hashes,lists
etc complex structures...
Erik
Do you think that i don't know what recursion is..man-
So i will repeat again to you as i repeat to Bill...
do you ever made Tokenizer as separate program?
I know that both of you have experience
but i cannot understand why you ignore my question?
Is that so hard or i speak greek?
Sorry to admit it Aurel but this confusion can be traced to language barriers, no pun intended. Eoredson and I tried to explain ourselves, sorry again.
02-27-2018, 01:52 AM This post was last modified: 02-27-2018, 01:54 AM by bplus. Edited 0 times
(02-26-2018, 03:19 PM)Aurel Wrote:
Quote:Without knowing what you're asking specifically...
What i am asking ?????
I repeat so many times...so man read ...
I dont know if somone already or ever made just tokenizer
so i am asking HOW on best way create tokenizer for basic like interpreter??
I have many sources but are written in C/C++ C# and use hashes,lists
etc complex structures...
Hi Aurel,
I know I never made just a tokenizer before, that is sure thing, for sure.
Why would I until I see a need for one?
How can I see a need for one if I don't know what it is supposed to do.
I mean does it just go around putting labels on words, er, ah, the things found in a line of source code?
And wouldn't "the best way" depend on what it is to be used for?
02-27-2018, 09:08 AM This post was last modified: 02-27-2018, 09:09 AM by EdDavis. Edited 0 times
Quote:What i am asking ?????
I repeat so many times...so man read ...
I dont know if somone already or ever made just tokenizer
so i am asking HOW on best way create tokenizer for basic like interpreter??
I have many sources but are written in C/C++ C# and use hashes,lists
etc complex structures...
And:
Quote:Do you think that i don't know what recursion is..man-
So i will repeat again to you as i repeat to Bill...
do you ever made Tokenizer as separate program?
I know that both of you have experience
but i cannot understand why you ignore my question?
Is that so hard or i speak greek?
I thought it was Croatian, but what do I know?
There isn't a best way to tokenize a source file, but one way is below:
Some simple source code (in a C-like language) to be tokenized:
Code:
/*
Simple prime number generator
*/
count = 1;
n = 1;
limit = 100;
while (n < limit) {
k=3;
p=1;
n=n+2;
while ((k*k<=n) && (p)) {
p=n/k*k!=n;
k=k+2;
}
if (p) {
print(n, " is prime\n");
count = count + 1;
}
}
print("Total primes found: ", count, "\n");
' where we store keywords and variables
type Symbol
s_name as string
tok as Token_type
end type
dim shared symtab() as Symbol
dim shared cur_line as string
dim shared cur_ch as string
dim shared line_num as integer
dim shared col_num as integer
function is_digit(byval ch as string) as long
is_digit = ch >= "0" AndAlso ch <= "9"
end function
function is_alnum(byval ch as string) as long
is_alnum = (ucase(ch) >= "A" AndAlso ucase(ch) <= "Z") OrElse is_digit(ch)
end function
sub error_msg(byval eline as integer, byval ecol as integer, byval msg as string)
print "("; eline; ":"; ecol; ") "; msg
print : print "Hit any to end program"
sleep
system
end sub
' add an identifier to the symbol table
function install(byval s_name as string, byval tok as Token_type) as integer
dim n as integer = ubound(symtab) + 1
redim preserve symtab(n)
symtab(n).s_name = s_name
symtab(n).tok = tok
return n
end function
' search for an identifier in the symbol table
function lookup(byval s_name as string) as integer
dim i as integer
for i = lbound(symtab) to ubound(symtab)
if symtab(i).s_name = s_name then return i
next
return -1
end function
sub next_line() ' read the next line of input from the source file
cur_line = ""
cur_ch = "" ' empty cur_ch means end-of-file
if eof(1) then exit sub
line input #1, cur_line
cur_line = cur_line + NewLine
line_num += + 1
col_num = 1
end sub
sub next_char() ' get the next char
cur_ch = ""
col_num += 1
if col_num > len(cur_line) then next_line()
if col_num <= len(cur_line) then cur_ch = mid(cur_line, col_num, 1)
end sub
function follow(byval err_line as integer, byval err_col as integer, byval expect as string, byval ifyes as Token_type, byval ifno as Token_type) as Token_type
if cur_ch = expect then
next_char()
return ifyes
end if
if ifno = tk_eoi then error_msg(err_line, err_col, "follow unrecognized character: " + cur_ch)
return ifno
end function
sub gettok(byref err_line as integer, byref err_col as integer, byref tok as Token_type, byref v as string)
' skip whitespace
do while (cur_ch = " " or cur_ch = chr(9) or cur_ch = NewLine) and (cur_ch <> "")
next_char()
loop
err_line = line_num
err_col = col_num
select case cur_ch
case "": tok = tk_eoi: exit sub
case "{": tok = tk_lbrace: next_char(): exit sub
case "}": tok = tk_rbrace: next_char(): exit sub
case "(": tok = tk_lparen: next_char(): exit sub
case ")": tok = tk_rparen: next_char(): exit sub
case "+": tok = tk_add: next_char(): exit sub
case "-": tok = tk_sub: next_char(): exit sub
case "*": tok = tk_mul: next_char(): exit sub
case "%": tok = tk_Mod: next_char(): exit sub
case ";": tok = tk_semi: next_char(): exit sub
case ",": tok = tk_comma: next_char(): exit sub
case "/": ' div or comment
next_char()
if cur_ch <> "*" then
tok = tk_div
exit sub
end if
' skip comments
next_char()
do
if cur_ch = "*" then
next_char()
if cur_ch = "/" then
next_char()
gettok(err_line, err_col, tok, v)
exit sub
end if
elseif cur_ch = "" then error_msg(err_line, err_col, "EOF in comment")
else
next_char()
end if
loop
case "'": ' single char literals
next_char()
v = str(asc(cur_ch))
if cur_ch = "'" then error_msg(err_line, err_col, "empty character constant")
if cur_ch = BackSlash then
next_char()
if cur_ch = "n" then
v = "10"
elseif cur_ch = BackSlash then
v = "92"
else error_msg(err_line, err_col, "unknown escape sequence: " + cur_ch)
end if
end if
next_char()
if cur_ch <> "'" then error_msg(err_line, err_col, "multi-character constant")
next_char()
tok = tk_integer
exit sub
case "<": next_char(): tok = follow(err_line, err_col, "=", tk_Leq, tk_Lss): exit sub
case ">": next_char(): tok = follow(err_line, err_col, "=", tk_Geq, tk_Gtr): exit sub
case "!": next_char(): tok = follow(err_line, err_col, "=", tk_Neq, tk_Not): exit sub
case "=": next_char(): tok = follow(err_line, err_col, "=", tk_Eq, tk_Assign): exit sub
case "&": next_char(): tok = follow(err_line, err_col, "&", tk_And, tk_EOI): exit sub
case "|": next_char(): tok = follow(err_line, err_col, "|", tk_Or, tk_EOI): exit sub
case DoubleQuote: ' string
v = cur_ch
next_char()
do while cur_ch <> DoubleQuote
if cur_ch = NewLine then error_msg(err_line, err_col, "EOL in string")
if cur_ch = "" then error_msg(err_line, err_col, "EOF in string")
v += cur_ch
next_char()
loop
v += cur_ch
next_char()
tok = tk_string
exit sub
case else ' integers or identifiers
dim is_number as boolean = is_digit(cur_ch)
v = ""
do while is_alnum(cur_ch) orelse cur_ch = "_"
if not is_digit(cur_ch) then is_number = false
v += cur_ch
next_char()
loop
if len(v) = 0 then error_msg(err_line, err_col, "unknown character: " + cur_ch)
if is_digit(mid(v, 1, 1)) then
if not is_number then error_msg(err_line, err_col, "invalid number: " + v)
tok = tk_integer
exit sub
end if
dim as integer index = lookup(v)
if index = -1 then
tok = tk_ident
else
tok = symtab(index).tok
end if
exit sub
end select
end sub
sub init_lex(byval filein as string)
install("else", tk_else)
install("if", tk_if)
install("print", tk_print)
install("putc", tk_putc)
install("while", tk_while)
open filein for input as #1
cur_line = ""
line_num = 0
col_num = 0
next_char()
end sub
sub scanner()
dim err_line as integer
dim err_col as integer
dim tok as Token_type
dim v as string
dim tok_list(tk_eoi to tk_string) as string
do
gettok(err_line, err_col, tok, v)
print using "##### ##### \ " + BackSlash; err_line; err_col; tok_list(tok);
if tok = tk_integer orelse tok = tk_ident orelse tok = tk_string then print " " + v;
print
loop until tok = tk_eoi
end sub
sub main()
if command(1) = "" then print "filename required" : exit sub
init_lex(command(1))
scanner()
end sub
main()
system
And the result: Line number, column number, token, and any attributes (in this sample, just an identifier, string, or integer):