这篇文章是在阅读GitHub - mattn/anko: Scriptable interpreter written in golang时候的笔记
diff --git parser/lexer.go parser/lexer.go
// Package parser implements parser for anko.
package parser
import (
"errors"
"fmt"
"unicode"
"github.com/mattn/anko/ast"
)
const (
// EOF is short for End of file.
+ // 文件的末尾
EOF = -1
+ // 行的末尾
// EOL is short for End of line.
EOL = '\n'
)
// Error provides a convenient interface for handling runtime error.
// It can be Error interface with type cast which can call Pos().
type Error struct {
Message string
Pos ast.Position
Filename string
Fatal bool
}
// Error returns the error message.
func (e *Error) Error() string {
return e.Message
}
// Scanner stores informations for lexer.
type Scanner struct {
- src []rune
+ src []rune
+ // 当前scan的位置
offset int
lineHead int
line int
}
// opName is correction of operation names.
+// 这里记录着关键字
var opName = map[string]int{
"func": FUNC,
"return": RETURN,
"var": VAR,
"throw": THROW,
"if": IF,
"for": FOR,
"break": BREAK,
"continue": CONTINUE,
"in": IN,
"else": ELSE,
"new": NEW,
"true": TRUE,
"false": FALSE,
"nil": NIL,
"module": MODULE,
"try": TRY,
"catch": CATCH,
"finally": FINALLY,
"switch": SWITCH,
"case": CASE,
"default": DEFAULT,
"go": GO,
"chan": CHAN,
"make": MAKE,
"type": TYPE,
"len": LEN,
"delete": DELETE,
}
// Init resets code to scan.
+// Init string -> []rune
func (s *Scanner) Init(src string) {
s.src = []rune(src)
}
// Scan analyses token, and decide identify or literals.
+// scan是扫描的主逻辑
func (s *Scanner) Scan() (tok int, lit string, pos ast.Position, err error) {
retry:
- s.skipBlank()
+ s.skipBlank() // 跳过空白
pos = s.pos()
switch ch := s.peek(); {
case isLetter(ch):
+ // 如果是字母,获取 scanIdentifier
lit, err = s.scanIdentifier()
if err != nil {
return
}
+
+ // 是否是已经记录的关键字
if name, ok := opName[lit]; ok {
tok = name
} else {
+ // IDENT
tok = IDENT
}
case isDigit(ch):
+ // 如果是数字,获取数字,NUMBER
tok = NUMBER
lit, err = s.scanNumber()
if err != nil {
return
}
case ch == '"':
+ // 如果是 ",扫描下一个",并设置为string
tok = STRING
lit, err = s.scanString('"')
if err != nil {
return
}
case ch == '\'':
+ // 如果是 \',扫描下一个\',并设置为string
tok = STRING
lit, err = s.scanString('\'')
if err != nil {
return
}
case ch == '`':
+ // 如果是 `,扫描下一个`,并设置为string
tok = STRING
lit, err = s.scanRawString()
if err != nil {
return
}
default:
+ // 剩下这些有一个共同点,需要 s.next()
switch ch {
case EOF:
+ // 文件末尾
tok = EOF
case '#':
+ // 注释,把这一行葬送掉,然后retry
for !isEOL(s.peek()) {
s.next()
}
goto retry
case '!':
s.next()
switch s.peek() {
case '=':
+ // `!=`
tok = NEQ
lit = "!="
default:
+ // `!`
s.back()
tok = int(ch)
lit = string(ch)
}
case '=':
s.next()
switch s.peek() {
case '=':
+ // `==`
tok = EQEQ
lit = "=="
default:
+ // `=`
s.back()
tok = int(ch)
lit = string(ch)
}
case '+':
s.next()
switch s.peek() {
case '+':
+ // `++`
tok = PLUSPLUS
lit = "++"
case '=':
+ // `+=`
tok = PLUSEQ
lit = "+="
default:
+ // `+`
s.back()
tok = int(ch)
lit = string(ch)
}
case '-':
s.next()
switch s.peek() {
case '-':
+ // `--`
tok = MINUSMINUS
lit = "--"
case '=':
+ // `-=`
tok = MINUSEQ
lit = "-="
default:
+ // `-`
s.back()
tok = int(ch)
lit = string(ch)
}
case '*':
s.next()
switch s.peek() {
case '*':
+ // `**`
tok = POW
lit = "**"
case '=':
+ // `*=`
tok = MULEQ
lit = "*="
default:
+ // `*`
s.back()
tok = int(ch)
lit = string(ch)
}
case '/':
s.next()
switch s.peek() {
case '=':
+ // `/=`
tok = DIVEQ
lit = "/="
default:
+ // `/`
s.back()
tok = int(ch)
lit = string(ch)
}
case '>':
s.next()
switch s.peek() {
case '=':
+ // `>=`
tok = GE
lit = ">="
case '>':
+ // `>>`
tok = SHIFTRIGHT
lit = ">>"
default:
+ // `>`
s.back()
tok = int(ch)
lit = string(ch)
}
case '<':
s.next()
switch s.peek() {
case '-':
+ // `<-`
tok = OPCHAN
lit = "<-"
case '=':
+ // `<=`
tok = LE
lit = "<="
case '<':
+ // `<<`
tok = SHIFTLEFT
lit = "<<"
default:
+ // `<`
s.back()
tok = int(ch)
lit = string(ch)
}
case '|':
s.next()
switch s.peek() {
case '|':
+ // `||`
tok = OROR
lit = "||"
case '=':
+ // `|=`
tok = OREQ
lit = "|="
default:
+ // `|`
s.back()
tok = int(ch)
lit = string(ch)
}
case '&':
s.next()
switch s.peek() {
case '&':
+ // `&&`
tok = ANDAND
lit = "&&"
case '=':
+ // `&=`
tok = ANDEQ
lit = "&="
default:
+ // `&`
s.back()
tok = int(ch)
lit = string(ch)
}
case '.':
s.next()
if s.peek() == '.' {
s.next()
if s.peek() == '.' {
+ // `...`
tok = VARARG
} else {
+ // `..` error
err = fmt.Errorf("syntax error on '%v' at %v:%v", string(ch), pos.Line, pos.Column)
return
}
} else {
+ // `.`
s.back()
tok = int(ch)
lit = string(ch)
}
case '\n', '(', ')', ':', ';', '%', '?', '{', '}', '[', ']', ',', '^':
+ // 这些不能后面接个字符,构成新的字符,所以直接返回 `int(ch)`
tok = int(ch)
lit = string(ch)
default:
+ // error
err = fmt.Errorf("syntax error on '%v' at %v:%v", string(ch), pos.Line, pos.Column)
tok = int(ch)
lit = string(ch)
return
}
s.next()
}
return
}
// isLetter returns true if the rune is a letter for identity.
+// 是否是字母:unicode.IsLetter || '_'
func isLetter(ch rune) bool {
return unicode.IsLetter(ch) || ch == '_'
}
// isDigit returns true if the rune is a number.
+// 是否是数字的字符集:'0'~'9'
func isDigit(ch rune) bool {
return '0' <= ch && ch <= '9'
}
// isHex returns true if the rune is a hex digits.
+// 返回是否是16进制的字符:'0'~'9' || 'a'~'f' || 'A'~'F'
func isHex(ch rune) bool {
return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F')
}
// isEOL returns true if the rune is at end-of-line or end-of-file.
+// 是否是文件末尾或者行的末尾: '\n' ch == -1
func isEOL(ch rune) bool {
return ch == '\n' || ch == -1
}
// isBlank returns true if the rune is empty character..
+// 是否为空:' ', '\t', '\r'
func isBlank(ch rune) bool {
return ch == ' ' || ch == '\t' || ch == '\r'
}
// peek returns current rune in the code.
+// 如果是文件末尾,那么返回EOF,否则返回当前位置的字符
func (s *Scanner) peek() rune {
if s.reachEOF() {
return EOF
}
return s.src[s.offset]
}
// next moves offset to next.
+// 如果没有到达文件末尾,那么:
+// 游标加1
+// 且:如果当前位置是\n,那么
+// s.lineHead = s.offset + 1
+// s.line++
func (s *Scanner) next() {
if !s.reachEOF() {
if s.peek() == '\n' {
s.lineHead = s.offset + 1
s.line++
}
s.offset++
}
}
// current returns the current offset.
+// 获取当前的游标
func (s *Scanner) current() int {
return s.offset
}
// offset sets the offset value.
+// 设置当前游标
func (s *Scanner) set(o int) {
s.offset = o
}
// back moves back offset once to top.
+// 游标后移一位
func (s *Scanner) back() {
s.offset--
}
// reachEOF returns true if offset is at end-of-file.
+// 是否达到文件末尾:offset >= len
func (s *Scanner) reachEOF() bool {
return len(s.src) <= s.offset
}
// pos returns the position of current.
+// 返回当前位置的 ast.Position,
+// Line:s.line + 1
+// Column: s.offset - s.lineHead + 1
func (s *Scanner) pos() ast.Position {
return ast.Position{Line: s.line + 1, Column: s.offset - s.lineHead + 1}
}
// skipBlank moves position into non-black character.
+// 跳过blank:空字符,\t,\n
func (s *Scanner) skipBlank() {
for isBlank(s.peek()) {
s.next()
}
}
// scanIdentifier returns identifier beginning at current position.
+// 返回从当前位置开始的,字母和数字的最长串
func (s *Scanner) scanIdentifier() (string, error) {
var ret []rune
for {
if !isLetter(s.peek()) && !isDigit(s.peek()) {
break
}
ret = append(ret, s.peek())
s.next()
}
return string(ret), nil
}
// scanNumber returns number beginning at current position.
+// 返回当前位置开始的数字
func (s *Scanner) scanNumber() (string, error) {
var ret []rune
ch := s.peek()
ret = append(ret, ch)
s.next()
if ch == '0' && s.peek() == 'x' {
ret = append(ret, s.peek())
s.next()
for isHex(s.peek()) {
ret = append(ret, s.peek())
s.next()
}
} else {
for isDigit(s.peek()) || s.peek() == '.' {
ret = append(ret, s.peek())
s.next()
}
if s.peek() == 'e' {
ret = append(ret, s.peek())
s.next()
if isDigit(s.peek()) || s.peek() == '+' || s.peek() == '-' {
ret = append(ret, s.peek())
s.next()
for isDigit(s.peek()) || s.peek() == '.' {
ret = append(ret, s.peek())
s.next()
}
}
for isDigit(s.peek()) || s.peek() == '.' {
ret = append(ret, s.peek())
s.next()
}
}
if isLetter(s.peek()) {
return "", errors.New("identifier starts immediately after numeric literal")
}
}
return string(ret), nil
}
// scanRawString returns raw-string starting at current position.
+// row-string,返回 当前位置开始的,` 之前的
func (s *Scanner) scanRawString() (string, error) {
var ret []rune
for {
s.next()
if s.peek() == EOF {
return "", errors.New("unexpected EOF")
}
if s.peek() == '`' {
s.next()
break
}
ret = append(ret, s.peek())
}
return string(ret), nil
}
// scanString returns string starting at current position.
// This handles backslash escaping.
+// 扫描,直到遇到某一个字符,比如scanString('"'),就会返回当前位置开始的,"之前的字符串
func (s *Scanner) scanString(l rune) (string, error) {
var ret []rune
eos:
for {
s.next()
switch s.peek() {
case EOL:
return "", errors.New("unexpected EOL")
case EOF:
return "", errors.New("unexpected EOF")
case l:
s.next()
break eos
case '\\':
s.next()
switch s.peek() {
case 'b':
ret = append(ret, '\b')
continue
case 'f':
ret = append(ret, '\f')
continue
case 'r':
ret = append(ret, '\r')
continue
case 'n':
ret = append(ret, '\n')
continue
case 't':
ret = append(ret, '\t')
continue
}
ret = append(ret, s.peek())
continue
default:
ret = append(ret, s.peek())
}
}
return string(ret), nil
}
// Lexer provides interface to parse codes.
type Lexer struct {
s *Scanner
lit string
pos ast.Position
e error
stmts []ast.Stmt
}
// Lex scans the token and literals.
+// 这个实现了xx接口,会被用来解析各个部分
func (l *Lexer) Lex(lval *yySymType) int {
+ // 扫描一次
tok, lit, pos, err := l.s.Scan()
if err != nil {
l.e = &Error{Message: err.Error(), Pos: pos, Fatal: true}
}
lval.tok = ast.Token{Tok: tok, Lit: lit}
lval.tok.SetPosition(pos)
l.lit = lit
l.pos = pos
return tok
}
// Error sets parse error.
func (l *Lexer) Error(msg string) {
l.e = &Error{Message: msg, Pos: l.pos, Fatal: false}
}
// Parse provides way to parse the code using Scanner.
func Parse(s *Scanner) ([]ast.Stmt, error) {
l := Lexer{s: s}
if yyParse(&l) != 0 {
return nil, l.e
}
+
return l.stmts, l.e
}
// EnableErrorVerbose enabled verbose errors from the parser
+// 打开详细错误信息
func EnableErrorVerbose() {
yyErrorVerbose = true
}
// ParseSrc provides way to parse the code from source.
+// 解析的入口
func ParseSrc(src string) ([]ast.Stmt, error) {
+ // new一个Scanner
scanner := &Scanner{
src: []rune(src),
}
- return Parse(scanner)
+ b, err := Parse(scanner)
+ return b, err
}