copp.go

package gopapageno

import (
	"context"
	"fmt"
)

// COPParser implements parsing using a simplified approach to C-OPGs.
type COPParser struct {
	g *Grammar

	concurrency       int
	reductionStrategy ReductionStrategy

	// Pools
	pools struct {
		stacks       []*Pool[stack[*Token]]
		nonterminals []*Pool[Token]
		stateStacks  []*Pool[stack[CyclicAutomataState]]

		// These are only used when reducing using a single sweep.
		sweepInput      *Pool[stack[Token]]
		sweepStack      *Pool[stack[*Token]]
		sweepStateStack *Pool[stack[CyclicAutomataState]]

		producedTokensMap []map[*Token]*Token
	}

	workers []*coppWorker
	results []*COPPStack
}

// NewCOPParser allocates all required resources for a COPParser to be usable.
func NewCOPParser(g *Grammar, src []byte, opts *RunOptions) *COPParser {
	p := &COPParser{
		g:                 g,
		concurrency:       opts.Concurrency,
		reductionStrategy: opts.ReductionStrategy,
		workers:           make([]*coppWorker, opts.Concurrency),
		results:           make([]*COPPStack, opts.Concurrency),
	}

	srcLen := len(src)
	stackPoolBaseSize := stacksCountFactored[*Token](src, opts)
	ntPoolBaseSize := srcLen / opts.AvgTokenLength / p.concurrency

	// Initialize memory pools for stacks.
	p.pools.stacks = make([]*Pool[stack[*Token]], p.concurrency)

	// Initialize pools to hold pointers to tokens generated by the reduction steps.
	p.pools.nonterminals = make([]*Pool[Token], p.concurrency)

	// Initialize memory pools for cyclic states.
	p.pools.stateStacks = make([]*Pool[stack[CyclicAutomataState]], p.concurrency)

	p.pools.producedTokensMap = make([]map[*Token]*Token, p.concurrency)

	stackMult := 1.0
	if stackPoolBaseSize == 0 {
		stackMult = 1.0 - (0.999 * opts.ParallelFactor)
	}
	ntMultiplier := 1.0 - (0.6 * opts.ParallelFactor)

	stackLen := stackLengthFor[*Token](stackMult)

	for thread := 0; thread < p.concurrency; thread++ {
		p.pools.stacks[thread] = NewPool(stackPoolBaseSize+1, WithConstructor(newStackFactory[*Token](stackLen)))
		p.pools.nonterminals[thread] = NewPool[Token](int(float64(ntPoolBaseSize) * ntMultiplier))
		p.pools.stateStacks[thread] = NewPool(stackPoolBaseSize+1, WithConstructor(newStackFactory[CyclicAutomataState](stackLen)))

		p.pools.producedTokensMap[thread] = make(map[*Token]*Token, int(float64(ntPoolBaseSize)*ntMultiplier))
	}

	// If reduction is sweep or mixed, we create another stack and input for the final pass.
	if p.concurrency > 1 && (p.reductionStrategy == ReductionSweep || p.reductionStrategy == ReductionMixed) {
		inputPoolBaseSize := stacksCount[Token](src, p.concurrency, opts.AvgTokenLength)

		p.pools.sweepInput = NewPool(inputPoolBaseSize, WithConstructor(newStack[Token]))
		p.pools.sweepStack = NewPool(stackPoolBaseSize+1, WithConstructor(newStackFactory[*Token](stackLen)))
		p.pools.sweepStateStack = NewPool(stackPoolBaseSize+1, WithConstructor(newStackFactory[CyclicAutomataState](stackLen)))
	}

	for thread := 0; thread < p.concurrency; thread++ {
		p.workers[thread] = &coppWorker{
			parser: p,
			id:     thread,
			ntPool: p.pools.nonterminals[thread],
		}
	}

	return p
}

// Parse performs C-OPG parsing of the provided tokensLists, returning the root of the resulting parse tree.
func (p *COPParser) Parse(ctx context.Context, tokensLists []*LOS[Token]) (*Token, error) {
	ctx, cancel := context.WithCancel(ctx)
	defer cancel()

	p.concurrency = len(tokensLists)

	resultCh := make(chan parseResult[COPPStack])
	errCh := make(chan error, 1)

	// First parallel pass of the algorithm.
	for thread := 0; thread < p.concurrency; thread++ {
		var nextToken *Token

		// If the thread is not the last, take the first token of the next stack as lookahead.
		if thread < p.concurrency-1 {
			nextInputListIter := tokensLists[thread+1].HeadIterator()
			nextToken = nextInputListIter.Next()
		}

		s := NewCOPPStack(p.pools.stacks[thread], p.pools.stateStacks[thread], p.pools.producedTokensMap[thread])
		go p.workers[thread].parse(ctx, s, tokensLists[thread], nextToken, false, resultCh, errCh)
	}

	if err := collectResults[COPPStack](p.results, resultCh, errCh, p.concurrency); err != nil {
		return nil, err
	}

	//If the number of threads is greater than one, results must be combined and work should continue.
	reductionPasses := 0

	// Reduction phase
	for p.concurrency--; p.concurrency >= 1; p.concurrency-- {
		// This branch performs a final sweep, it's taken either if ReductionSweep has been selected as a strategy
		// and if ReductionMixed has already performed the maximum number of parallel passes.
		if p.reductionStrategy == ReductionSweep || (p.reductionStrategy == ReductionMixed && reductionPasses >= 2) {
			// Nullifies the previous p.Concurrency-- (Concurrency is used by CombineSweepLOS)
			p.concurrency++

			// Create the final input by joining together the stacks from the previous step.
			stack := p.results[0].Combine()
			input, producedTokens := p.CombineSweepLOS(p.pools.sweepInput, p.results[1:])

			// Merge produced tokens maps
			// TODO: Find a better place to handle this.
			for k, v := range producedTokens {
				stack.ProducedTokens[k] = v
			}

			// Sets correct Concurrency level for final sweep.
			p.concurrency = 1

			go p.workers[0].parse(ctx, stack, input, nil, true, resultCh, errCh)

			if err := collectResults[COPPStack](p.results, resultCh, errCh, 1); err != nil {
				cancel()
				return nil, err
			}
		} else {
			// This branch performs parallel reductions.
			for i := 0; i < p.concurrency; i++ {
				stackLeft := p.results[i]
				stackRight := p.results[i+1]

				stack := stackLeft.Combine()

				// TODO: I should find a way to make this work without creating a new LOS for the inputs.
				// Unfortunately the new stack depends on the content of tokensLists[i] since its elements are stored there.
				// We can't erase the old input easily to reuse its storage.
				input, producedTokens := stackRight.CombineLOS(tokensLists[i].pool)

				// Merge produced tokens maps
				// TODO: Find a better place to handle this.
				for k, v := range producedTokens {
					stack.ProducedTokens[k] = v
				}

				go p.workers[i].parse(ctx, stack, input, nil, true, resultCh, errCh)
			}

			if err := collectResults[COPPStack](p.results, resultCh, errCh, p.concurrency); err != nil {
				cancel()
				return nil, err
			}

			reductionPasses++
		}
	}

	root, err := p.results[0].LastNonterminal()
	if err != nil {
		return nil, err
	}

	return root, nil
}

type coppWorker struct {
	parser *COPParser
	id     int

	ntPool *Pool[Token]
}

// parseCyclic implements COPP.
func (w *coppWorker) parse(ctx context.Context, stack *COPPStack, tokens *LOS[Token], nextToken *Token, finalPass bool, resultCh chan<- parseResult[COPPStack], errCh chan<- error) {
	tokensIt := tokens.HeadIterator()

	rhs := make([]TokenType, 0, w.parser.g.MaxPrefixLength)
	rhsTokens := make([]*Token, 0, w.parser.g.MaxPrefixLength)

	// If the thread is the first, push a # onto the stack
	// Otherwise, push the first inputToken onto the stack
	if !finalPass {
		if w.id == 0 {
			stack.Push(&Token{
				Type:       TokenTerm,
				Precedence: PrecEmpty,
			})
		} else {
			t := tokensIt.Next()
			t.Precedence = PrecEmpty
			stack.Push(t)
		}

		// If the thread is the last, push a # onto the tokensList.
		// Otherwise, push the lookahead token.
		if w.id == w.parser.concurrency-1 {
			tokens.Push(Token{
				Type:       TokenTerm,
				Precedence: PrecEmpty,
			})
		} else if nextToken != nil {
			tokens.Push(*nextToken)
		}
	}

	var prec Precedence

	// prefixCount is used to identify where to cut double occurrences of repeated prefixes.
	var prefixCount int

	for inputToken := tokensIt.Next(); inputToken != nil; {
		// Find the first terminal on the stack and get the precedence between it and the current token
		firstTerminal := stack.FirstTerminal()

		if !inputToken.Type.IsTerminal() {
			prec = PrecYields
		} else {
			// TODO: Consider removing this check, it is probably unnecessary.
			if firstTerminal == nil {
				prec = w.parser.g.precedence(TokenTerm, inputToken.Type)
			} else {
				prec = w.parser.g.precedence(firstTerminal.Type, inputToken.Type)

				if prec == PrecEquals && firstTerminal.Precedence == PrecTakes {
					prec = PrecYields
				}

				// This is required to put aside tokens that require the previous one to be matched within a rhs.
				// Since the previous one has Empty precedence, it means that it's unusable.
				if prec == PrecEquals && firstTerminal.Precedence == PrecEmpty {
					prec = PrecTakes
				}
			}
		}

		// If it yields precedence, PUSH the inputToken onto the stack.
		if prec == PrecYields {
			inputToken.Precedence = prec

			if inputToken.Type.IsTerminal() {
				stack.Push(inputToken)
			}

			// If the current construction is a single nonterminal, append the token to it.
			// Otherwise, swap.
			if stack.IsCurrentSingleNonterminal() {
				stack.AppendStateToken(inputToken)
			} else {
				stack.SwapState()
				stack.AppendStateToken(inputToken)
			}

			rhsTokens = rhsTokens[:0]
			rhs = rhs[:0]
			prefixCount = 0

			inputToken = tokensIt.Next()
		} else if prec == PrecEquals {
			inputToken.Precedence = prec

			// If the current construction is a single nonterminal, prepend the previous construction to it.
			if stack.IsCurrentSingleNonterminal() {
				stack.State.CurrentIndex = stack.State.PreviousIndex
				stack.State.CurrentLen += stack.State.PreviousLen
			}

			if len(rhsTokens) == 0 {
				rhsTokens = append(rhsTokens, stack.Current()...)
				for i := range stack.State.CurrentLen {
					rhs = append(rhs, rhsTokens[i].Type)
				}
			}
			rhsTokens = append(rhsTokens, inputToken)
			rhs = append(rhs, inputToken.Type)

			// Try to identify if the current construction matches a prefix.
			lhs, ruleNum := w.parser.g.findPrefixMatch(rhs)
			if lhs == TokenEmpty {
				prefixCount++
				stack.AppendStateToken(inputToken)

				// Replace the topmost token on the stack, keeping its state unchanged.
				_, s := stack.Pop2()
				stack.PushWithState(inputToken, *s)

				inputToken = tokensIt.Next()

				continue
			}

			lhsToken, err := w.matchPrefix(lhs, ruleNum, rhsTokens[:len(rhsTokens)-prefixCount-1], stack)
			if err != nil {
				errCh <- fmt.Errorf("worker %d could not match: %v", w.id, err)
				return
			}

			// Reset state
			stack.StateTokenStack.Tos = stack.State.CurrentIndex
			stack.State.CurrentLen = 0
			stack.AppendStateToken(lhsToken)

			for i := len(rhsTokens) - prefixCount - 1; i < len(rhsTokens); i++ {
				stack.AppendStateToken(rhsTokens[i])
			}

			prefixCount = 0

			// Replace the topmost token on the stack, keeping its state unchanged.
			// TODO: Consider adding a Shift method to stack.
			_, s := stack.Pop2()
			stack.PushWithState(inputToken, *s)

			inputToken = tokensIt.Next()
		} else if prec == PrecTakes {
			// If there are no tokens yielding precedence on the stack, push inputToken onto the stack.
			// Otherwise, perform a reduction.
			if stack.YieldingPrecedence() == 0 {
				if firstTerminal != nil && firstTerminal.Precedence != PrecEmpty {
					firstTerminal.Precedence = PrecTakes
				}

				inputToken.Precedence = prec
				stack.Push(inputToken)

				if inputToken.Type != TokenTerm {
					stack.SwapState()
				}

				inputToken = tokensIt.Next()
			} else {
				var i int

				rhsTokens = rhsTokens[:0]
				rhs = rhs[:0]

				if stack.IsCurrentSingleNonterminal() {
					rhsTokens = append(rhsTokens, stack.Previous()...)
					for i = 0; i < stack.State.PreviousLen; i++ {
						rhs = append(rhs, rhsTokens[i].Type)
					}
				}

				rhsTokens = append(rhsTokens, stack.Current()...)
				for j := 0; j < stack.State.CurrentLen; j++ {
					rhs = append(rhs, rhsTokens[i].Type)

					i++
				}

				_, st := stack.Pop2()
				stack.UpdateFirstTerminal()

				// Prefix is made of a single nonterminal
				if st.CurrentLen == 1 && !stack.StateTokenStack.Data[st.CurrentIndex].IsTerminal() {
					stack.State.PreviousIndex = st.PreviousIndex
					stack.State.PreviousLen = st.PreviousLen
				} else {
					stack.State.PreviousIndex = st.CurrentIndex
					stack.State.PreviousLen = st.CurrentLen
				}

				lhsToken, err := w.match(rhs, rhsTokens, stack)
				if err != nil {
					errCh <- fmt.Errorf("worker %d could not match: %v", w.id, err)
					return
				}

				// Reset state
				stack.StateTokenStack.Tos = stack.State.PreviousIndex + stack.State.PreviousLen + 1
				stack.State.CurrentIndex = stack.StateTokenStack.Tos - 1
				stack.State.CurrentLen = 1
				stack.StateTokenStack.Replace(lhsToken)

				prefixCount = 0
			}

			rhsTokens = rhsTokens[:0]
			rhs = rhs[:0]
		} else {
			//If there's no precedence relation, abort the parsing
			errCh <- fmt.Errorf("no precedence relation found")
			return
		}
	}

	resultCh <- parseResult[COPPStack]{w.id, stack}
}

func (w *coppWorker) matchPrefix(lhs TokenType, ruleNum uint16, rhsTokens []*Token, s *COPPStack) (*Token, error) {
	var lhsToken *Token
	var rf RuleFlags

	firstToken := rhsTokens[0]

	parentToken, ok := s.ProducedTokens[firstToken]
	if !ok {
		lhsToken = w.ntPool.Get()
		lhsToken.Type = lhs

		rf = rf.Set(RuleCyclic)
	} else {
		lhsToken = parentToken

		rf = rf.Set(RuleAppend)
	}

	rightParent, ok := s.ProducedTokens[rhsTokens[len(rhsTokens)-1]]
	if ok {
		rhsTokens[len(rhsTokens)-1] = rightParent
		rf = rf.Set(RuleCombine)
	}

	//Execute the semantic action
	w.parser.g.Func(ruleNum, rf, lhsToken, rhsTokens, w.id)

	s.ProducedTokens[rhsTokens[0]] = lhsToken

	return firstToken, nil
}

func (w *coppWorker) match(rhs []TokenType, rhsTokens []*Token, s *COPPStack) (*Token, error) {
	lhs, ruleNum := w.parser.g.findRuleMatch(rhs)
	if lhs == TokenEmpty {
		return nil, fmt.Errorf("could not find match for rhs %v", rhs)
	}

	var lhsToken *Token

	rt := RuleCyclic

	parentToken, ok := s.ProducedTokens[rhsTokens[0]]
	if !ok {
		lhsToken = w.ntPool.Get()
		lhsToken.Type = lhs
	} else {
		lhsToken = parentToken
		rt = RuleAppend
	}

	rightParent, ok := s.ProducedTokens[rhsTokens[len(rhsTokens)-1]]
	if ok {
		rhsTokens[len(rhsTokens)-1] = rightParent
		rt = RuleCombine
	}

	//Execute the semantic action
	w.parser.g.Func(ruleNum, rt, lhsToken, rhsTokens, w.id)

	return lhsToken, nil
}

func (p *COPParser) CombineSweepLOS(pool *Pool[stack[Token]], stacks []*COPPStack) (*LOS[Token], map[*Token]*Token) {
	input := NewLOS[Token](pool)
	newProducedTokens := make(map[*Token]*Token)

	tokenSet := make(map[*Token]struct{}, stacks[0].Length())

	for i := 0; i < p.concurrency-1; i++ {
		it := stacks[i].Iterator()

		//Ignore the first token.
		t, st := it.Next()
		tokenSet[t] = struct{}{}
		for _, t := range stacks[i].StateTokenStack.Slice(st.CurrentIndex, st.CurrentLen) {
			tokenSet[t] = struct{}{}
		}

		for t, st = it.Next(); t != nil; t, st = it.Next() {
			if t.Precedence == PrecEquals {
				if !it.IsLast() {
					continue
				}

				for _, stateToken := range stacks[i].StateTokenStack.Slice(st.CurrentIndex, st.CurrentLen) {
					if _, ok := tokenSet[stateToken]; !ok {
						stateToken.Precedence = PrecEmpty

						newToken := input.Push(*stateToken)
						parentToken, ok := stacks[i].ProducedTokens[stateToken]
						if ok {
							newProducedTokens[newToken] = parentToken
						}

						tokenSet[stateToken] = struct{}{}
					}
				}

				for _, stateToken := range stacks[i].Previous() {
					if _, ok := tokenSet[stateToken]; !ok {
						stateToken.Precedence = PrecEmpty

						newToken := input.Push(*stateToken)
						parentToken, ok := stacks[i].ProducedTokens[stateToken]
						if ok {
							newProducedTokens[newToken] = parentToken
						}

						tokenSet[stateToken] = struct{}{}
					}
				}

				for _, stateToken := range stacks[i].Current() {
					if _, ok := tokenSet[stateToken]; !ok {
						stateToken.Precedence = PrecEmpty

						newToken := input.Push(*stateToken)
						parentToken, ok := stacks[i].ProducedTokens[stateToken]
						if ok {
							newProducedTokens[newToken] = parentToken
						}

						tokenSet[stateToken] = struct{}{}
					}
				}

				continue
			}

			for _, stateToken := range stacks[i].StateTokenStack.Slice(st.CurrentIndex, st.CurrentLen) {
				if _, ok := tokenSet[stateToken]; !ok {
					stateToken.Precedence = PrecEmpty

					newToken := input.Push(*stateToken)
					parentToken, ok := stacks[i].ProducedTokens[stateToken]
					if ok {
						newProducedTokens[newToken] = parentToken
					}

					tokenSet[stateToken] = struct{}{}
				}
			}

			if _, ok := tokenSet[t]; !ok {
				t.Precedence = PrecEmpty

				newToken := input.Push(*t)
				parentToken, ok := stacks[i].ProducedTokens[t]
				if ok {
					newProducedTokens[newToken] = parentToken
				}

				tokenSet[t] = struct{}{}
			}
		}
	}

	return input, newProducedTokens
}