From 1b6ef1e43e1ec1107ce29a6438b399352d09fbc2 Mon Sep 17 00:00:00 2001 From: Mel Date: Mon, 11 Jul 2022 00:35:59 +0200 Subject: Rebuild compiler and code builder with markers --- pkg/lang/compiler/compiler.go | 231 +++++++++++++++++++++------------------ pkg/lang/compiler/scope_chain.go | 226 ++++++++++++++++++++++++++++++-------- pkg/lang/compiler/symbol.go | 31 +++++- 3 files changed, 334 insertions(+), 154 deletions(-) (limited to 'pkg/lang/compiler') diff --git a/pkg/lang/compiler/compiler.go b/pkg/lang/compiler/compiler.go index 116db1c..a31ade7 100644 --- a/pkg/lang/compiler/compiler.go +++ b/pkg/lang/compiler/compiler.go @@ -31,7 +31,7 @@ func (comp *Compiler) Compile() (code.Code, error) { target.AppendOp(code.OpHalt) - return target.Build(), nil + return target.Build() } func (comp *Compiler) compileStmt(t *code.Builder, stmt ast.Stmt) error { @@ -39,6 +39,12 @@ func (comp *Compiler) compileStmt(t *code.Builder, stmt ast.Stmt) error { switch stmt.Kind { case ast.StmtKindEmpty: // Do nothing. + case ast.StmtKindUse: + panic("use statements not implemented") + case ast.StmtKindFnDecl: + panic("function declaration statements not implemented") + case ast.StmtKindObjectDecl: + panic("object declaration statements not implemented") case ast.StmtKindVarDecl: decl := stmt.Value.(ast.StmtVarDecl) err = comp.compileVarDeclStmt(t, decl) @@ -51,11 +57,21 @@ func (comp *Compiler) compileStmt(t *code.Builder, stmt ast.Stmt) error { case ast.StmtKindForIn: forCondIn := stmt.Value.(ast.StmtForIn) err = comp.compileForInStmt(t, forCondIn) + case ast.StmtKindTry: + panic("try statements not implemented") + case ast.StmtKindReturn: + panic("return statements not implemented") + case ast.StmtKindContinue: + panic("continue statements not implemented") + case ast.StmtKindBreak: + panic("break statements not implemented") + case ast.StmtKindThrow: + panic("throw statements not implemented") case ast.StmtKindExpr: expr := stmt.Value.(ast.StmtExpr).Value err = comp.compileExpr(t, expr) default: - panic(fmt.Errorf("statement of kind %v not implemented", stmt.Kind)) + panic(fmt.Errorf("unknown statement kind: %d", stmt.Kind)) } return err @@ -90,54 +106,56 @@ func (comp *Compiler) compileIfStmt(t *code.Builder, ifStmt ast.StmtIf) error { // preventing other CondNodes from running. This is missing from the last CondNode. // Example: `jmp @end` - subUnits := make([]code.Builder, 0, len(ifStmt.Conds)) + // First we create all the markers we'll need for the if statement + parentMarker := comp.scopes.CreateAnonymousFunctionSubUnit() - totalLength := 0 + endMarker := parentMarker.SubMarker("end") + + condMarkers := make([]code.Marker, 0, len(ifStmt.Conds)-1) // We don't need a marker for the first CondNode. + for i := 0; i < len(ifStmt.Conds)-1; i++ { + condMarker := parentMarker.SubMarker("cond_%d", i+1) + condMarkers = append(condMarkers, condMarker) + } for i, cond := range ifStmt.Conds { - // Then block - thenTarget := code.NewBuilder() - if err := comp.compileBlockNode(&thenTarget, cond.Then); err != nil { - return err - } + isFirst := i == 0 + isLast := i == len(ifStmt.Conds)-1 - totalLength += thenTarget.Len() - if i != len(ifStmt.Conds)-1 { - totalLength += lengthOfAJumpInstruction + // If we aren't in the first CondNode, the node before it needs a marker to here. + if !isFirst { + marker := condMarkers[i-1] + t.PutMarker(marker) } - // Condition check - conditionTarget := code.NewBuilder() if !cond.Cond.IsEmpty() { - if err := comp.compileExpr(&conditionTarget, cond.Cond); err != nil { + // Condition check + if err := comp.compileExpr(t, cond.Cond); err != nil { return err } - totalLength += conditionTarget.Len() + lengthOfAJumpInstruction // condjmp - - conditionTarget.AppendOp(code.OpJf) // Condition jump - conditionTarget.AppendReferenceToPc(int64(totalLength)) + t.AppendOp(code.OpJf) + if isLast { + t.AppendMarkerReference(endMarker) + } else { + nextCondMarker := condMarkers[i] + t.AppendMarkerReference(nextCondMarker) + } } - subUnit := conditionTarget - subUnit.AppendBuilder(thenTarget) - subUnits = append(subUnits, subUnit) - } - - result := code.NewBuilder() - - // Then jumps - for i, subUnit := range subUnits { - if i != len(ifStmt.Conds)-1 { - subUnit.AppendOp(code.OpJmp) - subUnit.AppendReferenceToPc(int64(totalLength)) + // Then block + if err := comp.compileBlockNode(t, cond.Then); err != nil { + return err } - result.AppendBuilderWithoutAdjustingReferences(subUnit) + // Then jump + if !isLast { + t.AppendOp(code.OpJmp) + t.AppendMarkerReference(endMarker) + } } - t.AppendBuilder(result) + t.PutMarker(endMarker) return nil } @@ -149,34 +167,34 @@ func (comp *Compiler) compileForCondStmt(t *code.Builder, forCondStmt ast.StmtFo // 3. Do block: Does something // 4. Repeat jump: Jumps back to start - // Do block - doTarget := code.NewBuilder() - if err := comp.compileBlockNode(&doTarget, forCondStmt.Do); err != nil { - return err - } + parentMarker := comp.scopes.CreateAnonymousFunctionSubUnit() + + startMarker := parentMarker.SubMarker("start") + endMarker := parentMarker.SubMarker("end") + + t.PutMarker(startMarker) - conditionTarget := code.NewBuilder() if !forCondStmt.Cond.IsEmpty() { // Condition check - if err := comp.compileExpr(&conditionTarget, forCondStmt.Cond); err != nil { + if err := comp.compileExpr(t, forCondStmt.Cond); err != nil { return err } - endOfFor := conditionTarget.Len() + doTarget.Len() + lengthOfAJumpInstruction*2 - - // Condition jump - conditionTarget.AppendOp(code.OpJf) - conditionTarget.AppendReferenceToPc(int64(endOfFor)) + // Condition check + t.AppendOp(code.OpJf) + t.AppendMarkerReference(endMarker) } - subUnit := conditionTarget - subUnit.AppendBuilder(doTarget) + // Do block + if err := comp.compileBlockNode(t, forCondStmt.Do); err != nil { + return err + } // Repeat jump - subUnit.AppendOp(code.OpJmp) - subUnit.AppendReferenceToPc(int64(0)) // Start of the for + t.AppendOp(code.OpJmp) + t.AppendMarkerReference(startMarker) - t.AppendBuilder(subUnit) + t.PutMarker(endMarker) return nil } @@ -216,88 +234,81 @@ func (comp *Compiler) compileForInStmt(t *code.Builder, forInStmt ast.StmtForIn) // @end: // halt + parentMarker := comp.scopes.CreateAnonymousFunctionSubUnit() + + checkMarker := parentMarker.SubUnit("check") + endMarker := parentMarker.SubUnit("end") + // Preparation - preparationTarget := code.NewBuilder() - if err := comp.compileExpr(&preparationTarget, forInStmt.Collection); err != nil { + if err := comp.compileExpr(t, forInStmt.Collection); err != nil { return err } collectionLocal := comp.scopes.DeclareAnonymous() - preparationTarget.AppendOp(code.OpPushInt) - preparationTarget.AppendInt(0) + t.AppendOp(code.OpPushInt) + t.AppendInt(0) iLocal := comp.scopes.DeclareAnonymous() - preparationTarget.AppendOp(code.OpPushNull) + t.AppendOp(code.OpPushNull) nameLocal, ok := comp.scopes.Declare(forInStmt.Name.Value) if !ok { return fmt.Errorf("variable %s already declared", forInStmt.Name.Value) } // Condition check - conditionTarget := code.NewBuilder() + t.PutMarker(checkMarker) - conditionTarget.AppendOp(code.OpGetLocal) - conditionTarget.AppendInt(int64(iLocal)) + t.AppendOp(code.OpGetLocal) + t.AppendInt(int64(iLocal)) - conditionTarget.AppendOp(code.OpGetLocal) - conditionTarget.AppendInt(int64(collectionLocal)) + t.AppendOp(code.OpGetLocal) + t.AppendInt(int64(collectionLocal)) - conditionTarget.AppendOp(code.OpGetMember) - conditionTarget.AppendString("length") + t.AppendOp(code.OpGetMember) + t.AppendString("length") - conditionTarget.AppendOp(code.OpCall) - conditionTarget.AppendInt(0) + t.AppendOp(code.OpCall) + t.AppendInt(0) - conditionTarget.AppendOp(code.OpLt) + t.AppendOp(code.OpLt) - // Do Preparation - doPreparationTarget := code.NewBuilder() + // Condition jump + t.AppendOp(code.OpJf) + t.AppendMarkerReference(endMarker) - doPreparationTarget.AppendOp(code.OpGetLocal) - doPreparationTarget.AppendInt(int64(collectionLocal)) + // Do Preparation + t.AppendOp(code.OpGetLocal) + t.AppendInt(int64(collectionLocal)) - doPreparationTarget.AppendOp(code.OpGetLocal) - doPreparationTarget.AppendInt(int64(iLocal)) + t.AppendOp(code.OpGetLocal) + t.AppendInt(int64(iLocal)) - doPreparationTarget.AppendOp(code.OpIndex) + t.AppendOp(code.OpIndex) - doPreparationTarget.AppendOp(code.OpSetLocal) - doPreparationTarget.AppendInt(int64(nameLocal)) + t.AppendOp(code.OpSetLocal) + t.AppendInt(int64(nameLocal)) - doPreparationTarget.AppendOp(code.OpGetLocal) - doPreparationTarget.AppendInt(int64(iLocal)) + t.AppendOp(code.OpGetLocal) + t.AppendInt(int64(iLocal)) - doPreparationTarget.AppendOp(code.OpPushInt) - doPreparationTarget.AppendInt(1) + t.AppendOp(code.OpPushInt) + t.AppendInt(1) - doPreparationTarget.AppendOp(code.OpAdd) + t.AppendOp(code.OpAdd) - doPreparationTarget.AppendOp(code.OpSetLocal) - doPreparationTarget.AppendInt(int64(iLocal)) + t.AppendOp(code.OpSetLocal) + t.AppendInt(int64(iLocal)) // Do block - doTarget := code.NewBuilder() - if err := comp.compileBlockNode(&doTarget, forInStmt.Do); err != nil { + if err := comp.compileBlockNode(t, forInStmt.Do); err != nil { return err } - // Condition Jump - - endOfFor := preparationTarget.Len() + conditionTarget.Len() + doPreparationTarget.Len() + doTarget.Len() + lengthOfAJumpInstruction*2 - - conditionTarget.AppendOp(code.OpJf) - conditionTarget.AppendReferenceToPc(int64(endOfFor)) - - subUnit := preparationTarget - subUnit.AppendBuilderWithoutAdjustingReferences(conditionTarget) - subUnit.AppendBuilder(doPreparationTarget) - subUnit.AppendBuilder(doTarget) - // Repeat jump - subUnit.AppendOp(code.OpJmp) - subUnit.AppendReferenceToPc(int64(preparationTarget.Len())) + t.AppendOp(code.OpJmp) + t.AppendMarkerReference(checkMarker) - t.AppendBuilder(subUnit) + t.PutMarker(endMarker) return nil } @@ -389,17 +400,23 @@ func (comp *Compiler) compileAssignExpr(t *code.Builder, expr ast.ExprBinary) er } name := expr.Left.Value.(ast.ExprIdent).Value.Value - symbol, ok := comp.scopes.Lookup(name) + symbolId, ok := comp.scopes.Lookup(name) if !ok { return fmt.Errorf("variable %s not declared", name) } + if symbolId.symbolKind != SymbolKindVariable { + return fmt.Errorf("can't assign to a %v", symbolId.symbolKind) + } + + symbol := comp.scopes.GetVariable(symbolId) + if err := comp.compileExpr(t, expr.Right); err != nil { return err } t.AppendOp(code.OpSetLocal) - t.AppendInt(int64(symbol.localIndex)) + t.AppendInt(int64(symbol.data.localIndex)) return nil } @@ -478,14 +495,20 @@ func (comp *Compiler) compileArrayLitExpr(t *code.Builder, expr ast.ExprArrayLit } func (comp *Compiler) compileIdentExpr(t *code.Builder, expr ast.ExprIdent) error { - symbol, ok := comp.scopes.Lookup(expr.Value.Value) + symbolId, ok := comp.scopes.Lookup(expr.Value.Value) if !ok { return fmt.Errorf("undefined symbol %s", expr.Value.Value) } + if symbolId.symbolKind != SymbolKindVariable { + return fmt.Errorf("%v values are not implemeted yet", symbolId.symbolKind) + } + + symbol := comp.scopes.GetVariable(symbolId) + // TODO: Add boundries to check how the symbol should be fetched. (local, env, global, etc.) t.AppendOp(code.OpGetLocal) - t.AppendInt(int64(symbol.localIndex)) + t.AppendInt(int64(symbol.data.localIndex)) return nil } @@ -531,7 +554,3 @@ func (comp *Compiler) compileBlockNode(t *code.Builder, block ast.BlockNode) err return nil } - -const ( - lengthOfAJumpInstruction = 9 // The length of a jump Op (jmp, jf, jt) and it's following 64-bit integer. -) diff --git a/pkg/lang/compiler/scope_chain.go b/pkg/lang/compiler/scope_chain.go index 6b7e693..ad176da 100644 --- a/pkg/lang/compiler/scope_chain.go +++ b/pkg/lang/compiler/scope_chain.go @@ -1,102 +1,238 @@ package compiler +import ( + "fmt" + "jinx/pkg/lang/vm/code" +) + type ScopeID int type ScopeChain struct { - scopes []Scope + nameToSymbol map[string]SymbolID + scopes []Scope } func NewScopeChain() ScopeChain { scopes := make([]Scope, 1) - scopes[0] = Scope{ - kind: ScopeKindGlobal, - nameToSymbol: make(map[string]int), - symbols: make([]Symbol, 0), - } + scopes[0] = NewFunctionScope("") // Top-most scope is a function scope, so it can have sub-units return ScopeChain{ - scopes: scopes, + nameToSymbol: make(map[string]SymbolID), + scopes: scopes, } } +func (sc *ScopeChain) CurrentScopeID() ScopeID { + return ScopeID(len(sc.scopes) - 1) +} + func (sc *ScopeChain) Current() *Scope { - return &sc.scopes[len(sc.scopes)-1] + return &sc.scopes[sc.CurrentScopeID()] } -func (sc *ScopeChain) Enter(kind ScopeKind) { - sc.scopes = append(sc.scopes, Scope{ - kind: kind, - nameToSymbol: make(map[string]int), - symbols: make([]Symbol, 0), - }) +func (sc *ScopeChain) CurrentFunction() *Scope { + // TODO: Probably should make this lookup constant by making a seperate array of function scopes + for i := len(sc.scopes) - 1; i <= 0; i++ { + if sc.scopes[i].kind == ScopeKindFunction { + return &sc.scopes[i] + } + } + + panic("top scope should always be a function scope") +} + +func (sc *ScopeChain) Enter() { + sc.scopes = append(sc.scopes, NewNormalScope()) +} + +func (sc *ScopeChain) EnterFunction(unitName string) { + sc.scopes = append(sc.scopes, NewFunctionScope(unitName)) } func (sc *ScopeChain) Exit() { - sc.scopes[len(sc.scopes)-1] = Scope{} - sc.scopes = sc.scopes[:len(sc.scopes)-1] + if sc.CurrentScopeID() == 0 { + return + } + + sc.scopes[sc.CurrentScopeID()] = Scope{} + sc.scopes = sc.scopes[:sc.CurrentScopeID()] } func (sc *ScopeChain) Declare(name string) (int, bool) { // Check whether the symbol is already declared in any of the scopes. - for _, scope := range sc.scopes { - if _, ok := scope.nameToSymbol[name]; ok { - return 0, false - } + if _, ok := sc.nameToSymbol[name]; ok { + return 0, false } current := sc.Current() - index := len(current.symbols) + indexInScope := len(current.variableSymbols) + + symbolID := SymbolID{ + symbolKind: SymbolKindVariable, + scopeID: sc.CurrentScopeID(), + indexInScope: indexInScope, + } // Declare the symbol in the current scope. - current.symbols = append(current.symbols, Symbol{ - kind: SymbolKindVariable, - name: name, - localIndex: index, + current.variableSymbols = append(current.variableSymbols, Symbol[SymbolVariable]{ + name: name, + data: SymbolVariable{ + localIndex: indexInScope, + }, + }) + + sc.nameToSymbol[name] = symbolID + + return indexInScope, true +} + +func (sc *ScopeChain) DeclareFunction(name string) (code.Marker, bool) { + if _, ok := sc.nameToSymbol[name]; ok { + return "", false + } + + current := sc.Current() + index := len(current.functionSymbols) + + symbolID := SymbolID{ + symbolKind: SymbolKindFunction, + scopeID: sc.CurrentScopeID(), + indexInScope: index, + } + + unitName := sc.CreateFunctionSubUnit(name) + + current.functionSymbols = append(current.functionSymbols, Symbol[SymbolFunction]{ + name: name, + data: SymbolFunction{ + marker: unitName, + }, }) - current.nameToSymbol[name] = index + sc.nameToSymbol[name] = symbolID - return index, true + return unitName, true } func (sc *ScopeChain) DeclareAnonymous() int { current := sc.Current() - index := len(current.symbols) + index := len(current.variableSymbols) - // Declare the symbol in the current scope. - current.symbols = append(current.symbols, Symbol{ - kind: SymbolKindVariable, - name: "", - localIndex: index, + current.variableSymbols = append(current.variableSymbols, Symbol[SymbolVariable]{ + name: "", // An anonymous symbol has no name. + data: SymbolVariable{ + localIndex: index, + }, }) return index } func (sc *ScopeChain) DeclareTemporary() int { - return len(sc.Current().symbols) + return len(sc.Current().variableSymbols) // :) } -func (sc *ScopeChain) Lookup(name string) (Symbol, bool) { - for i := len(sc.scopes) - 1; i >= 0; i-- { - if symbol, ok := sc.scopes[i].nameToSymbol[name]; ok { - return sc.scopes[i].symbols[symbol], true - } +func (sc *ScopeChain) CreateAnonymousFunctionSubUnit() code.Marker { + fnScope := sc.CurrentFunction() + data := fnScope.data.(ScopeFunction) + + index := data.subUnitCount + data.subUnitCount++ + + fnScope.data = data + + return sc.CreateFunctionSubUnit(fmt.Sprintf("anon_%d", index)) +} + +func (sc *ScopeChain) CreateFunctionSubUnit(subUnitName string) code.Marker { + fnScope := sc.CurrentFunction() + data := fnScope.data.(ScopeFunction) + + name := data.unitName + if name == "" { + name = code.Marker(subUnitName) + } else { + name = name.SubUnit(subUnitName) + } + + return name +} + +func (sc *ScopeChain) Lookup(name string) (SymbolID, bool) { + if id, ok := sc.nameToSymbol[name]; ok { + return id, true + } + + return SymbolID{}, false +} + +func (sc *ScopeChain) GetVariable(id SymbolID) Symbol[SymbolVariable] { + if id.symbolKind != SymbolKindVariable { + panic("incorrect symbol id kind given") + } + + return sc.scopes[id.scopeID].variableSymbols[id.indexInScope] +} + +func (sc *ScopeChain) GetFunction(id SymbolID) Symbol[SymbolVariable] { + if id.symbolKind != SymbolKindVariable { + panic("incorrect symbol id kind given") } - return Symbol{}, false + return sc.scopes[id.scopeID].variableSymbols[id.indexInScope] +} + +type SymbolID struct { + symbolKind SymbolKind + scopeID ScopeID + indexInScope int } type ScopeKind int const ( - ScopeKindGlobal ScopeKind = iota + ScopeKindNormal ScopeKind = iota ScopeKindFunction - ScopeKindBlock + ScopeKindLoop ) type Scope struct { - kind ScopeKind - nameToSymbol map[string]int - symbols []Symbol + variableSymbols []Symbol[SymbolVariable] + functionSymbols []Symbol[SymbolFunction] + + kind ScopeKind + data any +} + +func NewNormalScope() Scope { + return Scope{ + variableSymbols: make([]Symbol[SymbolVariable], 0), + functionSymbols: make([]Symbol[SymbolFunction], 0), + kind: ScopeKindNormal, + data: ScopeNormal{}, + } +} + +func NewFunctionScope(unitName string) Scope { + return Scope{ + variableSymbols: make([]Symbol[SymbolVariable], 0), + functionSymbols: make([]Symbol[SymbolFunction], 0), + kind: ScopeKindFunction, + data: ScopeFunction{ + unitName: code.Marker(unitName), + subUnitCount: 0, + }, + } +} + +type ScopeNormal struct{} + +type ScopeFunction struct { + unitName code.Marker + subUnitCount int +} + +type ScopeLoop struct { + breakMarker code.Marker + continueMarker code.Marker } diff --git a/pkg/lang/compiler/symbol.go b/pkg/lang/compiler/symbol.go index 03838da..d22cdc0 100644 --- a/pkg/lang/compiler/symbol.go +++ b/pkg/lang/compiler/symbol.go @@ -1,13 +1,38 @@ package compiler +import "jinx/pkg/lang/vm/code" + type SymbolKind int const ( SymbolKindVariable SymbolKind = iota + SymbolKindFunction ) -type Symbol struct { - kind SymbolKind - name string +func (s SymbolKind) String() string { + switch s { + case SymbolKindVariable: + return "variable" + case SymbolKindFunction: + return "function" + default: + panic("unknown symbol kind") + } +} + +type Symbol[D SymbolData] struct { + name string + data D +} + +type SymbolData interface { + SymbolVariable | SymbolFunction +} + +type SymbolVariable struct { localIndex int } + +type SymbolFunction struct { + marker code.Marker +} -- cgit 1.4.1