// Copyright 2016 PingCAP, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // See the License for the specific language governing permissions and // limitations under the License. // Copyright 2014 The goyacc Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // This source code uses portions of code previously published in the Go tool // yacc[0] program, the respective license can be found in the LICENSE-GO-YACC // file. // Goyacc is a version of yacc generating Go parsers. // // Usage // // Note: If no non flag arguments are given, goyacc reads standard input. // // goyacc [options] [input] // // options and (defaults) // -c Report state closures. (false) // -cr Check all states are reducible. (false) // -dlval Debug value when runtime yyDebug >= 3. ("lval") // -dlvalf Debug format of -dlval. ("%+v") // -ex Explain how were conflicts resolved. (false) // -l Disable line directives, for compatibility only - ignored. (false) // -la Report all lookahead sets. (false) // -o outputFile Parser output. ("y.go") // -p prefix Name prefix to use in generated code. ("yy") // -v reportFile Create grammar report. ("y.output") // -xe examplesFile Generate error messages by examples. ("") // -xegen examplesFile Generate a file suitable for -xe automatically from the grammar. // The file must not exist. ("") // // // // Changelog // // 2015-03-24: The search for a custom error message is now extended to include // also the last state that was shifted into, if any. This change resolves a // problem in which a lookahead symbol is valid for a reduce action in state A, // but the same symbol is later never accepted by any shift action in some // state B which is popped from the state stack after the reduction is // performed. The computed from example state is A but when the error is // actually detected, the state is now B and the custom error was thus not // used. // // 2015-02-23: Added -xegen flag. It can be used to automagically generate a // skeleton errors by example file which can be, for example, edited and/or // submited later as an argument of the -xe option. // // 2014-12-18: Support %precedence for better bison compatibility[3]. The // actual changes are in packages goyacc is dependent on. Goyacc users should // rebuild the binary: // // $ go get -u github.com/cznic/goyacc // // 2014-12-02: Added support for the optional yyLexerEx interface. The Reduced // method can be useful for debugging and/or automatically producing examples // by parsing code fragments. If it returns true the parser exits immediately // with return value -1. // // Overview // // The generated parser is reentrant and mostly backwards compatible with // parsers generated by go tool yacc[0]. yyParse expects to be given an // argument that conforms to the following interface: // // type yyLexer interface { // Lex(lval *yySymType) int // Errorf(format string, a ...interface{}) // Errors() (warns []error, errs []error) // } // // Optionally the argument to yyParse may implement the following interface: // // type yyLexerEx interface { // yyLexer // // Hook for recording a reduction. // Reduced(rule, state int, lval *yySymType) (stop bool) // Client should copy *lval. // } // // Lex should return the token identifier, and place other token information in // lval (which replaces the usual yylval). Error is equivalent to yyerror in // the original yacc. // // Code inside the parser may refer to the variable yylex, which holds the // yyLexer passed to Parse. // // Multiple grammars compiled into a single program should be placed in // distinct packages. If that is impossible, the "-p prefix" flag to yacc sets // the prefix, by default yy, that begins the names of symbols, including // types, the parser, and the lexer, generated and referenced by yacc's // generated code. Setting it to distinct values allows multiple grammars to be // placed in a single package. // // Differences wrt go tool yacc // // - goyacc implements ideas from "Generating LR Syntax Error Messages from // Examples"[1]. Use the -xe flag to pass a name of the example file. For more // details about the example format please see [2]. // // - The grammar report includes example token sequences leading to the // particular state. Can help understanding conflicts. // // - Minor changes in parser debug output. // // Links // // Referenced from elsewhere: // // [0]: http://golang.org/cmd/yacc/ // [1]: http://people.via.ecp.fr/~stilgar/doc/compilo/parser/Generating%20LR%20Syntax%20Error%20Messages.pdf // [2]: http://godoc.org/github.com/cznic/y#hdr-Error_Examples // [3]: http://www.gnu.org/software/bison/manual/html_node/Precedence-Only.html#Precedence-Only package main import ( "bufio" "bytes" "flag" "fmt" "go/format" "go/scanner" "go/token" "io" "io/ioutil" "log" "os" "runtime" "sort" "strings" "github.com/cznic/mathutil" parser "github.com/cznic/parser/yacc" "github.com/cznic/sortutil" "github.com/cznic/strutil" "github.com/cznic/y" ) var ( //oNoDefault = flag.Bool("nodefault", false, "disable generating $default actions") oClosures = flag.Bool("c", false, "report state closures") oReducible = flag.Bool("cr", false, "check all states are reducible") oDlval = flag.String("dlval", "lval", "debug value (runtime yyDebug >= 3)") oDlvalf = flag.String("dlvalf", "%+v", "debug format of -dlval (runtime yyDebug >= 3)") oLA = flag.Bool("la", false, "report all lookahead sets") oNoLines = flag.Bool("l", false, "disable line directives (for compatibility ony - ignored)") oOut = flag.String("o", "y.go", "parser output") oPref = flag.String("p", "yy", "name prefix to use in generated code") oReport = flag.String("v", "y.output", "create grammar report") oResolved = flag.Bool("ex", false, "explain how were conflicts resolved") oXErrors = flag.String("xe", "", "generate eXtra errors from examples source file") oXErrorsGen = flag.String("xegen", "", "generate error from examples source file automatically from the grammar") ) func main() { log.SetFlags(0) defer func() { _, file, line, ok := runtime.Caller(2) if e := recover(); e != nil { switch { case ok: log.Fatalf("%s:%d: panic: %v", file, line, e) default: log.Fatalf("panic: %v", e) } } }() flag.Parse() var in string switch flag.NArg() { case 0: in = os.Stdin.Name() case 1: in = flag.Arg(0) default: log.Fatal("expected at most one non flag argument") } if err := main1(in); err != nil { switch x := err.(type) { case scanner.ErrorList: for _, v := range x { fmt.Fprintf(os.Stderr, "%v\n", v) } os.Exit(1) default: log.Fatal(err) } } } type symUsed struct { sym *y.Symbol used int } type symsUsed []symUsed func (s symsUsed) Len() int { return len(s) } func (s symsUsed) Swap(i, j int) { s[i], s[j] = s[j], s[i] } func (s symsUsed) Less(i, j int) bool { if s[i].used > s[j].used { return true } if s[i].used < s[j].used { return false } caseFoldedCompare := strings.Compare(strings.ToLower(s[i].sym.Name), strings.ToLower(s[j].sym.Name)) if caseFoldedCompare < 0 { return true } if caseFoldedCompare > 0 { return false } return s[i].sym.Name < s[j].sym.Name } func main1(in string) (err error) { var out io.Writer if nm := *oOut; nm != "" { var f *os.File var e error if f, err = os.Create(nm); err != nil { return err } defer func() { if e1 := f.Close(); e1 != nil && err == nil { err = e1 } }() w := bufio.NewWriter(f) defer func() { if e1 := w.Flush(); e1 != nil && err == nil { err = e1 } }() buf := bytes.NewBuffer(nil) out = buf defer func() { var dest []byte if dest, e = format.Source(buf.Bytes()); e != nil { dest = buf.Bytes() } if _, e = w.Write(dest); e != nil && err == nil { err = e } }() } var rep io.Writer if nm := *oReport; nm != "" { f, err1 := os.Create(nm) if err1 != nil { return err1 } defer func() { if e := f.Close(); e != nil && err == nil { err = e } }() w := bufio.NewWriter(f) defer func() { if e := w.Flush(); e != nil && err == nil { err = e } }() rep = w } var xerrors []byte if nm := *oXErrors; nm != "" { b, err1 := ioutil.ReadFile(nm) if err1 != nil { return err1 } xerrors = b } p, err := y.ProcessFile(token.NewFileSet(), in, &y.Options{ //NoDefault: *oNoDefault, AllowConflicts: true, Closures: *oClosures, LA: *oLA, Reducible: *oReducible, Report: rep, Resolved: *oResolved, XErrorsName: *oXErrors, XErrorsSrc: xerrors, }) if err != nil { return err } if fn := *oXErrorsGen; fn != "" { f, err := os.OpenFile(fn, os.O_RDWR|os.O_CREATE, 0666) if err != nil { return err } b := bufio.NewWriter(f) if err := p.SkeletonXErrors(b); err != nil { return err } if err := b.Flush(); err != nil { return err } if err := f.Close(); err != nil { return err } } msu := make(map[*y.Symbol]int, len(p.Syms)) // sym -> usage for nm, sym := range p.Syms { if nm == "" || nm == "ε" || nm == "$accept" || nm == "#" { continue } msu[sym] = 0 } var minArg, maxArg int for _, state := range p.Table { for _, act := range state { msu[act.Sym]++ k, arg := act.Kind() if k == 'a' { continue } if k == 'r' { arg = -arg } minArg, maxArg = mathutil.Min(minArg, arg), mathutil.Max(maxArg, arg) } } su := make(symsUsed, 0, len(msu)) for sym, used := range msu { su = append(su, symUsed{sym, used}) } sort.Sort(su) // ----------------------------------------------------------- Prologue f := strutil.IndentFormatter(out, "\t") mustFormat(f, "// CAUTION: Generated file - DO NOT EDIT.\n\n") mustFormat(f, "%s", injectImport(p.Prologue)) mustFormat(f, ` type %[1]sSymType %i%s%u type %[1]sXError struct { state, xsym int } `, *oPref, p.UnionSrc) // ---------------------------------------------------------- Constants nsyms := map[string]*y.Symbol{} a := make([]string, 0, len(msu)) maxTokName := 0 for sym := range msu { nm := sym.Name if nm == "$default" || nm == "$end" || sym.IsTerminal && nm[0] != '\'' && sym.Value > 0 { maxTokName = mathutil.Max(maxTokName, len(nm)) a = append(a, nm) } nsyms[nm] = sym } sort.Strings(a) mustFormat(f, "\nconst (%i\n") for _, v := range a { nm := v switch nm { case "error": nm = *oPref + "ErrCode" case "$default": nm = *oPref + "Default" case "$end": nm = *oPref + "EOFCode" } mustFormat(f, "%s%s = %d\n", nm, strings.Repeat(" ", maxTokName-len(nm)+1), nsyms[v].Value) } minArg-- // eg: [-13, 42], minArg -14 maps -13 to 1 so zero cell values -> empty. mustFormat(f, "\n%sMaxDepth = 200\n", *oPref) mustFormat(f, "%sTabOfs = %d\n", *oPref, minArg) mustFormat(f, "%u)") // ---------------------------------------------------------- Variables mustFormat(f, "\n\nvar (%i\n") // Lex translation table mustFormat(f, "%sXLAT = map[int]int{%i\n", *oPref) xlat := make(map[int]int, len(su)) var errSym int for i, v := range su { if v.sym.Name == "error" { errSym = i } xlat[v.sym.Value] = i mustFormat(f, "%6d: %3d, // %s (%dx)\n", v.sym.Value, i, v.sym.Name, msu[v.sym]) } mustFormat(f, "%u}\n") // Symbol names mustFormat(f, "\n%sSymNames = []string{%i\n", *oPref) for _, v := range su { mustFormat(f, "%q,\n", v.sym.Name) } mustFormat(f, "%u}\n") // Reduction table mustFormat(f, "\n%sReductions = []struct{xsym, components int}{%i\n", *oPref) for _, rule := range p.Rules { mustFormat(f, "{%d, %d},\n", xlat[rule.Sym.Value], len(rule.Components)) } mustFormat(f, "%u}\n") // XError table mustFormat(f, "\n%[1]sXErrors = map[%[1]sXError]string{%i\n", *oPref) for _, xerr := range p.XErrors { state := xerr.Stack[len(xerr.Stack)-1] xsym := -1 if xerr.Lookahead != nil { xsym = xlat[xerr.Lookahead.Value] } mustFormat(f, "%[1]sXError{%d, %d}: \"%s\",\n", *oPref, state, xsym, xerr.Msg) } mustFormat(f, "%u}\n\n") // Parse table tbits := 32 switch n := mathutil.BitLen(maxArg - minArg + 1); { case n < 8: tbits = 8 case n < 16: tbits = 16 } mustFormat(f, "%sParseTab = [%d][]uint%d{%i\n", *oPref, len(p.Table), tbits) nCells := 0 var tabRow sortutil.Uint64Slice for si, state := range p.Table { tabRow = tabRow[:0] max := 0 for _, act := range state { sym := act.Sym xsym, ok := xlat[sym.Value] if !ok { panic("internal error 001") } max = mathutil.Max(max, xsym) kind, arg := act.Kind() switch kind { case 'a': arg = 0 case 'r': arg *= -1 } tabRow = append(tabRow, uint64(xsym)<<32|uint64(arg-minArg)) } nCells += max tabRow.Sort() col := -1 if si%5 == 0 { mustFormat(f, "// %d\n", si) } mustFormat(f, "{") for i, v := range tabRow { xsym := int(uint32(v >> 32)) arg := int(uint32(v)) if col+1 != xsym { mustFormat(f, "%d: ", xsym) } switch { case i == len(tabRow)-1: mustFormat(f, "%d", arg) default: mustFormat(f, "%d, ", arg) } col = xsym } mustFormat(f, "},\n") } mustFormat(f, "%u}\n") fmt.Fprintf(os.Stderr, "Parse table entries: %d of %d, x %d bits == %d bytes\n", nCells, len(p.Table)*len(msu), tbits, nCells*tbits/8) if n := p.ConflictsSR; n != 0 { fmt.Fprintf(os.Stderr, "conflicts: %d shift/reduce\n", n) } if n := p.ConflictsRR; n != 0 { fmt.Fprintf(os.Stderr, "conflicts: %d reduce/reduce\n", n) } mustFormat(f, `%u) var %[1]sDebug = 0 type %[1]sLexer interface { Lex(lval *%[1]sSymType) int Errorf(format string, a ...interface{}) error AppendError(err error) Errors() (warns []error, errs []error) } type %[1]sLexerEx interface { %[1]sLexer Reduced(rule, state int, lval *%[1]sSymType) bool } func %[1]sSymName(c int) (s string) { x, ok := %[1]sXLAT[c] if ok { return %[1]sSymNames[x] } return __yyfmt__.Sprintf("%%d", c) } func %[1]slex1(yylex %[1]sLexer, lval *%[1]sSymType) (n int) { n = yylex.Lex(lval) if n <= 0 { n = %[1]sEOFCode } if %[1]sDebug >= 3 { __yyfmt__.Printf("\nlex %%s(%%#x %%d), %[4]s: %[3]s\n", %[1]sSymName(n), n, n, %[4]s) } return n } func %[1]sParse(yylex %[1]sLexer, parser *Parser) int { const yyError = %[2]d yyEx, _ := yylex.(%[1]sLexerEx) var yyn int parser.yylval = %[1]sSymType{} parser.yyVAL = %[1]sSymType{} yyS := parser.cache Nerrs := 0 /* number of errors */ Errflag := 0 /* error recovery flag */ yyerrok := func() { if %[1]sDebug >= 2 { __yyfmt__.Printf("yyerrok()\n") } Errflag = 0 } _ = yyerrok yystate := 0 yychar := -1 var yyxchar int var yyshift int yyp := -1 goto yystack ret0: return 0 ret1: return 1 yystack: /* put a state and value onto the stack */ yyp++ if yyp >= len(yyS) { nyys := make([]%[1]sSymType, len(yyS)*2) copy(nyys, yyS) yyS = nyys parser.cache = yyS } yyS[yyp] = parser.yyVAL yyS[yyp].yys = yystate yynewstate: if yychar < 0 { yychar = %[1]slex1(yylex, &parser.yylval) var ok bool if yyxchar, ok = %[1]sXLAT[yychar]; !ok { yyxchar = len(%[1]sSymNames) // > tab width } } if %[1]sDebug >= 4 { var a []int for _, v := range yyS[:yyp+1] { a = append(a, v.yys) } __yyfmt__.Printf("state stack %%v\n", a) } row := %[1]sParseTab[yystate] yyn = 0 if yyxchar < len(row) { if yyn = int(row[yyxchar]); yyn != 0 { yyn += %[1]sTabOfs } } switch { case yyn > 0: // shift yychar = -1 parser.yyVAL = parser.yylval yystate = yyn yyshift = yyn if %[1]sDebug >= 2 { __yyfmt__.Printf("shift, and goto state %%d\n", yystate) } if Errflag > 0 { Errflag-- } goto yystack case yyn < 0: // reduce case yystate == 1: // accept if %[1]sDebug >= 2 { __yyfmt__.Println("accept") } goto ret0 } if yyn == 0 { /* error ... attempt to resume parsing */ switch Errflag { case 0: /* brand new error */ if %[1]sDebug >= 1 { __yyfmt__.Printf("no action for %%s in state %%d\n", %[1]sSymName(yychar), yystate) } msg, ok := %[1]sXErrors[%[1]sXError{yystate, yyxchar}] if !ok { msg, ok = %[1]sXErrors[%[1]sXError{yystate, -1}] } if !ok && yyshift != 0 { msg, ok = %[1]sXErrors[%[1]sXError{yyshift, yyxchar}] } if !ok { msg, ok = %[1]sXErrors[%[1]sXError{yyshift, -1}] } if !ok || msg == "" { msg = "syntax error" } // ignore goyacc error message yylex.AppendError(yylex.Errorf("")) Nerrs++ fallthrough case 1, 2: /* incompletely recovered error ... try again */ Errflag = 3 /* find a state where "error" is a legal shift action */ for yyp >= 0 { row := %[1]sParseTab[yyS[yyp].yys] if yyError < len(row) { yyn = int(row[yyError])+%[1]sTabOfs if yyn > 0 { // hit if %[1]sDebug >= 2 { __yyfmt__.Printf("error recovery found error shift in state %%d\n", yyS[yyp].yys) } yystate = yyn /* simulate a shift of "error" */ goto yystack } } /* the current p has no shift on "error", pop stack */ if %[1]sDebug >= 2 { __yyfmt__.Printf("error recovery pops state %%d\n", yyS[yyp].yys) } yyp-- } /* there is no state on the stack with an error shift ... abort */ if %[1]sDebug >= 2 { __yyfmt__.Printf("error recovery failed\n") } goto ret1 case 3: /* no shift yet; clobber input char */ if %[1]sDebug >= 2 { __yyfmt__.Printf("error recovery discards %%s\n", %[1]sSymName(yychar)) } if yychar == %[1]sEOFCode { goto ret1 } yychar = -1 goto yynewstate /* try again in the same state */ } } r := -yyn x0 := %[1]sReductions[r] x, n := x0.xsym, x0.components yypt := yyp _ = yypt // guard against "declared and not used" yyp -= n if yyp+1 >= len(yyS) { nyys := make([]%[1]sSymType, len(yyS)*2) copy(nyys, yyS) yyS = nyys parser.cache = yyS } parser.yyVAL = yyS[yyp+1] /* consult goto table to find next state */ exState := yystate yystate = int(%[1]sParseTab[yyS[yyp].yys][x])+%[1]sTabOfs /* reduction by production r */ if %[1]sDebug >= 2 { __yyfmt__.Printf("reduce using rule %%v (%%s), and goto state %%d\n", r, %[1]sSymNames[x], yystate) } switch r {%i `, *oPref, errSym, *oDlvalf, *oDlval) for r, rule := range p.Rules { if rule.Action == nil { continue } action := rule.Action.Values if len(action) == 0 { continue } if len(action) == 1 { part := action[0] if part.Type == parser.ActionValueGo { src := part.Src src = src[1 : len(src)-1] // Remove lead '{' and trail '}' if strings.TrimSpace(src) == "" { continue } } } components := rule.Components typ := rule.Sym.Type max := len(components) if p1 := rule.Parent; p1 != nil { max = rule.MaxParentDlr components = p1.Components } mustFormat(f, "case %d: ", r) for _, part := range action { num := part.Num switch part.Type { case parser.ActionValueGo: mustFormat(f, "%s", part.Src) case parser.ActionValueDlrDlr: mustFormat(f, "parser.yyVAL.%s", typ) if typ == "" { panic("internal error 002") } case parser.ActionValueDlrNum: typ := p.Syms[components[num-1]].Type if typ == "" { panic("internal error 003") } mustFormat(f, "yyS[yypt-%d].%s", max-num, typ) case parser.ActionValueDlrTagDlr: mustFormat(f, "parser.yyVAL.%s", part.Tag) case parser.ActionValueDlrTagNum: mustFormat(f, "yyS[yypt-%d].%s", max-num, part.Tag) } } mustFormat(f, "\n") } mustFormat(f, `%u } if yyEx != nil && yyEx.Reduced(r, exState, &parser.yyVAL) { return -1 } goto yystack /* stack new state and value */ } %[2]s `, *oPref, p.Tail) _ = oNoLines //TODO Ignored for now return nil } func injectImport(src string) string { const inj = ` import __yyfmt__ "fmt" ` fset := token.NewFileSet() file := fset.AddFile("", -1, len(src)) var s scanner.Scanner s.Init( file, []byte(src), nil, scanner.ScanComments, ) for { switch _, tok, _ := s.Scan(); tok { case token.EOF: return inj + src case token.PACKAGE: s.Scan() // ident pos, _, _ := s.Scan() ofs := file.Offset(pos) return src[:ofs] + inj + src[ofs:] } } } func mustFormat(f strutil.Formatter, format string, args ...interface{}) { _, err := f.Format(format, args...) if err != nil { log.Fatalf("format error %v", err) } }