opp.go

package gopapageno

import (
	"context"
	"fmt"
)

type OPParser struct {
	g *Grammar

	concurrency       int
	reductionStrategy ReductionStrategy

	pools struct {
		stacks       []*Pool[stack[*Token]]
		nonterminals []*Pool[Token]

		// These are only used when reducing using a single sweep.
		sweepInput *Pool[stack[Token]]
		sweepStack *Pool[stack[*Token]]
	}

	workers []*oppWorker
	results []*OPPStack
}

func NewOPParser(g *Grammar, src []byte, opts *RunOptions) *OPParser {
	p := &OPParser{
		g:                 g,
		concurrency:       opts.Concurrency,
		reductionStrategy: opts.ReductionStrategy,
		workers:           make([]*oppWorker, opts.Concurrency),
		results:           make([]*OPPStack, opts.Concurrency),
	}

	srcLen := len(src)
	stackPoolBaseSize := stacksCount[*Token](src, p.concurrency, opts.AvgTokenLength)
	ntPoolBaseSize := int(float64(srcLen/opts.AvgTokenLength/p.concurrency) * 1.5)

	// Initialize memory pools for stacks.
	p.pools.stacks = make([]*Pool[stack[*Token]], p.concurrency)

	// Initialize pools to hold pointers to tokens generated by the reduction steps.
	p.pools.nonterminals = make([]*Pool[Token], p.concurrency)

	for thread := 0; thread < p.concurrency; thread++ {
		p.pools.stacks[thread] = NewPool(stackPoolBaseSize, WithConstructor(newStack[*Token]))
		p.pools.nonterminals[thread] = NewPool[Token](ntPoolBaseSize)
	}

	if p.concurrency > 1 && (p.reductionStrategy == ReductionSweep || p.reductionStrategy == ReductionMixed) {
		inputPoolBaseSize := stacksCount[Token](src, p.concurrency, opts.AvgTokenLength)

		p.pools.sweepInput = NewPool(inputPoolBaseSize, WithConstructor(newStack[Token]))
		p.pools.sweepStack = NewPool(stackPoolBaseSize, WithConstructor(newStack[*Token]))
	}

	for thread := 0; thread < p.concurrency; thread++ {
		p.workers[thread] = &oppWorker{
			parser: p,
			id:     thread,
			ntPool: p.pools.nonterminals[thread],
		}
	}

	return p
}

type oppWorker struct {
	parser *OPParser

	id     int
	ntPool *Pool[Token]
}

func (p *OPParser) Parse(ctx context.Context, tokensLists []*LOS[Token]) (*Token, error) {
	ctx, cancel := context.WithCancel(ctx)
	defer cancel()

	p.concurrency = len(tokensLists)

	resultCh := make(chan parseResult[OPPStack])
	errCh := make(chan error, 1)

	// First parallel pass of the algorithm.
	for thread := 0; thread < p.concurrency; thread++ {
		var nextToken *Token

		// If the thread is not the last, also take the first token of the next stack for lookahead.
		if thread < p.concurrency-1 {
			nextInputListIter := tokensLists[thread+1].HeadIterator()
			nextToken = nextInputListIter.Next()
		}

		s := NewOPPStack(p.pools.stacks[thread])
		go p.workers[thread].parse(ctx, s, tokensLists[thread], nextToken, false, resultCh, errCh)
	}

	if err := collectResults[OPPStack](p.results, resultCh, errCh, p.concurrency); err != nil {
		return nil, err
	}

	//If the number of threads is greater than one, results must be combined and work should continue.
	reductionPasses := 0

	// Reduction phase
	for p.concurrency--; p.concurrency >= 1; p.concurrency-- {
		if p.reductionStrategy == ReductionSweep || (p.reductionStrategy == ReductionMixed && reductionPasses >= 2) {

			// Nullifies the previous p.Concurrency--
			p.concurrency++

			// Create the final input by joining together the stacks from the previous step.
			stack := p.results[0].Combine()
			input := p.CombineSweepLOS(p.pools.sweepInput, p.results[1:])

			// Sets correct Concurrency level for final sweep.
			p.concurrency = 1

			go p.workers[0].parse(ctx, stack, input, nil, true, resultCh, errCh)

			if err := collectResults[OPPStack](p.results, resultCh, errCh, 1); err != nil {
				cancel()
				return nil, err
			}
		} else {
			for i := 0; i < p.concurrency; i++ {
				stackLeft := p.results[i]
				stackRight := p.results[i+1]

				stack := stackLeft.Combine()

				// TODO: I should find a way to make this work without creating a new LOS for the inputs.
				// Unfortunately the new stack depends on the content of tokensLists[i] since its elements are stored there.
				// We can't erase the old input easily to reuse its storage.
				// TODO: Maybe allocate 2 * c LOS so that we can alternate?
				input := stackRight.CombineLOS(tokensLists[i].pool)

				go p.workers[i].parse(ctx, stack, input, nil, true, resultCh, errCh)
			}

			if err := collectResults[OPPStack](p.results, resultCh, errCh, p.concurrency); err != nil {
				cancel()
				return nil, err
			}

			reductionPasses++
		}
	}

	root, err := p.results[0].LastNonterminal()
	if err != nil {
		return nil, err
	}

	return root, nil
}

func (p *OPParser) CombineSweepLOS(pool *Pool[stack[Token]], stacks []*OPPStack) *LOS[Token] {
	input := NewLOS[Token](pool)
	for i := 0; i < p.concurrency-1; i++ {
		iterator := stacks[i].HeadIterator()

		//Ignore the first token.
		iterator.Next()

		for token := iterator.Next(); token != nil; token = iterator.Next() {
			input.Push(*token)
		}
	}
	return input
}

// parse implements both OPP and AOPP strategies.
func (w *oppWorker) parse(ctx context.Context, stack *OPPStack, tokens *LOS[Token], nextToken *Token, finalPass bool, resultCh chan<- parseResult[OPPStack], errCh chan<- error) {
	tokensIt := tokens.HeadIterator()

	// If the thread is the first, push a # onto the stack
	// Otherwise, push the first inputToken onto the stack
	if !finalPass {
		if w.id == 0 {
			stack.Push(&Token{
				Type:       TokenTerm,
				Value:      nil,
				Precedence: PrecEmpty,
				Next:       nil,
				Child:      nil,
			})
		} else {
			t := tokensIt.Next()
			t.Precedence = PrecEmpty
			stack.Push(t)
		}

		// If the thread is the last, push a # onto the tokens m
		// Otherwise, push onto the tokens m the first inputToken of the next tokens m
		if w.id == w.parser.concurrency-1 {
			tokens.Push(Token{
				Type:       TokenTerm,
				Value:      nil,
				Precedence: PrecEmpty,
				Next:       nil,
				Child:      nil,
			})
		} else if nextToken != nil {
			tokens.Push(*nextToken)
		}
	}

	var pos int
	var lhsToken *Token

	var rhs []TokenType
	var rhsTokens []*Token

	rhsBuf := make([]TokenType, w.parser.g.MaxRHSLength)
	rhsTokensBuf := make([]*Token, w.parser.g.MaxRHSLength)

	newNonTerm := Token{
		Type:       TokenEmpty,
		Value:      nil,
		Precedence: PrecEmpty,
		Next:       nil,
		Child:      nil,
	}

	// Iterate over the tokens
	// If this is the first worker, start reading from the input stack, otherwise begin with the last
	// token of the previous stack.
	for inputToken := tokensIt.Next(); inputToken != nil; {
		//If the current inputToken is a non-terminal, push it onto the stack with no precedence relation
		if !inputToken.Type.IsTerminal() {
			inputToken.Precedence = PrecEmpty
			stack.Push(inputToken)

			inputToken = tokensIt.Next()
			continue
		}

		//Find the first terminal on the stack and get the precedence between it and the current tokens inputToken
		firstTerminal := stack.FirstTerminal()

		var prec Precedence
		if firstTerminal == nil {
			prec = w.parser.g.precedence(TokenTerm, inputToken.Type)
		} else {
			prec = w.parser.g.precedence(firstTerminal.Type, inputToken.Type)
		}

		// If it's equal in precedence or yields, push the inputToken onto the stack with its precedence relation.
		if prec == PrecEquals || prec == PrecYields {
			inputToken.Precedence = prec
			stack.Push(inputToken)

			inputToken = tokensIt.Next()
		} else if prec == PrecTakes || prec == PrecAssociative {
			//If there are no tokens yielding precedence on the stack, push inputToken onto the stack.
			//Otherwise, perform a reduction
			if stack.YieldingPrecedence() == 0 {
				inputToken.Precedence = prec
				stack.Push(inputToken)

				inputToken = tokensIt.Next()
			} else {
				pos = w.parser.g.MaxRHSLength - 1

				var token *Token
				// Pop tokens from the stack until one that yields precedence is reached, saving them in rhsBuf
				for token = stack.Pop(); token.Precedence != PrecYields && token.Precedence != PrecAssociative; token = stack.Pop() {
					rhsTokensBuf[pos] = token
					rhsBuf[pos] = token.Type
					pos--
				}

				rhsTokensBuf[pos] = token
				rhsBuf[pos] = token.Type

				//Pop one last token, if it's a non-terminal add it to rhsBuf, otherwise ignore it (push it again onto the stack)
				token = stack.Pop()
				if token.Type.IsTerminal() {
					stack.Push(token)
				} else {
					pos--
					rhsTokensBuf[pos] = token
					rhsBuf[pos] = token.Type

					stack.UpdateFirstTerminal()
				}

				//Obtain the actual rhs from the buffers
				rhsTokens = rhsTokensBuf[pos:]
				rhs = rhsBuf[pos:]

				//Find corresponding lhs and ruleNum
				lhs, ruleNum := w.parser.g.findRuleMatch(rhs)
				if lhs == TokenEmpty {
					errCh <- fmt.Errorf("could not find match for rhs %v", rhs)
					return
				}

				newNonTerm.Type = lhs
				lhsToken = w.ntPool.Get()
				*lhsToken = newNonTerm

				//Execute the semantic action
				w.parser.g.Func(ruleNum, RuleSimple, lhsToken, rhsTokens, w.id)

				//Push the new nonterminal onto the stack
				stack.Push(lhsToken)
			}
		} else {
			//If there's no precedence relation, abort the parsing
			errCh <- fmt.Errorf("no precedence relation found")
			return
		}
	}

	resultCh <- parseResult[OPPStack]{w.id, stack}
}