Skip to content

Commit

Permalink
RDS roundtrip test with R closures
Browse files Browse the repository at this point in the history
Several tests are not yet passing because character encodings are not fully implemented
  • Loading branch information
breitnw committed Jul 15, 2024
1 parent 698de95 commit 4a7a285
Show file tree
Hide file tree
Showing 10 changed files with 283 additions and 125 deletions.
3 changes: 2 additions & 1 deletion src/main/java/org/prlprg/bc/Bc.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ public Builder() {

public Builder(List<SEXP> consts) {
code = new BcCode.Builder();
this.consts = new ConstPool.Builder(consts);
this.consts = new ConstPool.Builder(consts.size());
this.consts.addAll(consts);
}

public void setTrackSrcRefs(boolean track) {
Expand Down
59 changes: 48 additions & 11 deletions src/main/java/org/prlprg/bc/ConstPool.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import com.google.common.collect.ImmutableList;
import edu.umd.cs.findbugs.annotations.Nullable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -125,31 +124,59 @@ public static class Builder {
private final List<SEXP> values;

public Builder() {
this(Collections.emptyList());
this.index = new HashMap<>();
this.values = new ArrayList<>();
}

public Builder(List<SEXP> consts) {
index = new HashMap<>(consts.size());
values = new ArrayList<>(consts.size());

for (var e : consts) {
add(e);
}
public Builder(int expectedSize) {
this.index = new HashMap<>(expectedSize);
this.values = new ArrayList<>(expectedSize);
}

public <S extends SEXP> Idx<S> add(S c) {
var i =
index.computeIfAbsent(
c,
(ignored) -> {
var x = index.size();
var x = values.size();
values.add(c);
return x;
});

return Idx.create(i, c);
}

/**
* Adds all the constants from {@code consts} to the builder.
*
* @apiNote Unlike {@link #addAllPreservingIndices(List) addAllPreservingIndices}, this method
* will not necessarily maintain the invariant that the builder's size is the same as the
* list's size. That is, if an element appears multiple times in the list, it will only be
* added once to the constant pool.
*/
public void addAll(List<SEXP> consts) {
for (var e : consts) {
add(e);
}
}

/**
* Adds all the constants from {@code consts} to the builder, preserving their indices in the
* original list.
*
* @apiNote This method should not be used unless it is critical for the indices in the supplied
* list to align with those in the constant pool, and possible that they will not (e.g.,
* reading a constant pool from RDS). To add a list of constants to the constant pool while
* ensuring that there are no duplicates, invoke {@link #addAll(List) addAll} on an empty
* {@link ConstPool.Builder}.
*/
public void addAllPreservingIndices(List<SEXP> consts) {
for (var e : consts) {
this.index.put(e, values.size());
this.values.add(e);
}
}

/**
* Finish building the pool.
*
Expand Down Expand Up @@ -180,11 +207,21 @@ public Idx<RegSymSXP> indexSym(int i) {
return index(i, RegSymSXP.class);
}

// FIXME: do we need this?
// FIXME: do we need these? ---
public @Nullable Idx<LangSXP> indexLangOrNilIfNegative(int i) {
return i >= 0 ? orNil(i, LangSXP.class) : null;
}

public @Nullable Idx<IntSXP> indexIntOrNilIfNegative(int i) {
return i >= 0 ? orNil(i, IntSXP.class) : null;
}

public @Nullable Idx<StrSXP> indexStrOrNilIfNegative(int i) {
return i >= 0 ? orNil(i, StrSXP.class) : null;
}

// -- FIXME

public @Nullable Idx<StrOrRegSymSXP> indexStrOrSymOrNil(int i) {
return orNil(i, StrOrRegSymSXP.class);
}
Expand Down
15 changes: 8 additions & 7 deletions src/main/java/org/prlprg/rds/GNURByteCodeDecoderFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ class GNURByteCodeDecoderFactory {
GNURByteCodeDecoderFactory(ImmutableIntArray byteCode, List<SEXP> consts) {
this.byteCode = byteCode;

cpb = new ConstPool.Builder(consts);
cpb = new ConstPool.Builder(consts.size());
cpb.addAllPreservingIndices(consts);
cbb = new BcCode.Builder();
labelMapping = LabelMapping.fromGNUR(byteCode);

Expand Down Expand Up @@ -213,9 +214,9 @@ BcInstr decode() {
case DUP2ND -> new BcInstr.Dup2nd();
case SWITCH -> {
var ast = cpb.indexLang(byteCode.get(curr++));
var names = cpb.indexStrOrNil(byteCode.get(curr++));
var chrLabelsIdx = cpb.indexIntOrNil(byteCode.get(curr++));
var numlabelsIdx = cpb.indexIntOrNil(byteCode.get(curr++));
var names = cpb.indexStrOrNilIfNegative(byteCode.get(curr++));
var chrLabelsIdx = cpb.indexIntOrNilIfNegative(byteCode.get(curr++));
var numLabelsIdx = cpb.indexIntOrNilIfNegative(byteCode.get(curr++));

// in the case switch does not have any named labels this will be null,
if (chrLabelsIdx != null) {
Expand All @@ -226,11 +227,11 @@ BcInstr decode() {
// case of empty switch?
// in some cases, the number labels can be the same as the chrLabels
// and we do not want to remap twice
if (numlabelsIdx != null && !numlabelsIdx.equals(chrLabelsIdx)) {
cpb.reset(numlabelsIdx, this::remapLabels);
if (numLabelsIdx != null && !numLabelsIdx.equals(chrLabelsIdx)) {
cpb.reset(numLabelsIdx, this::remapLabels);
}

yield new BcInstr.Switch(ast, names, chrLabelsIdx, numlabelsIdx);
yield new BcInstr.Switch(ast, names, chrLabelsIdx, numLabelsIdx);
}
case RETURNJMP -> new BcInstr.ReturnJmp();
case STARTSUBSET_N ->
Expand Down
113 changes: 81 additions & 32 deletions src/main/java/org/prlprg/rds/GNURByteCodeEncoderFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,51 @@
import org.prlprg.bc.Bc;
import org.prlprg.bc.BcCode;
import org.prlprg.bc.BcInstr;
import org.prlprg.bc.ConstPool;
import org.prlprg.sexp.IntSXP;
import org.prlprg.sexp.SEXPs;

public class GNURByteCodeEncoderFactory {
private final BcCode bc;
private final ImmutableIntArray.Builder builder;
private final LabelMapping mapping;
private final LabelMapping labelMapping;
private final ConstPool.Builder cpb;

GNURByteCodeEncoderFactory(BcCode bc) {
this.bc = bc;
GNURByteCodeEncoderFactory(Bc bc) {
this.bc = bc.code();
this.builder = ImmutableIntArray.builder();
this.mapping = LabelMapping.toGNUR(bc);
this.labelMapping = LabelMapping.toGNUR(this.bc);
this.cpb = new ConstPool.Builder(bc.consts().size());
this.cpb.addAllPreservingIndices(bc.consts());
}

public ImmutableIntArray buildRaw() {
public static class GNURByteCode {
private final ImmutableIntArray instructions;
private final ConstPool consts;

private GNURByteCode(ImmutableIntArray instructions, ConstPool consts) {
this.instructions = instructions;
this.consts = consts;
}

public ImmutableIntArray getInstructions() {
return instructions;
}

public ConstPool getConsts() {
return consts;
}
}

public GNURByteCode buildRaw() {
// Write the bytecode version first
builder.add(Bc.R_BC_VERSION);
// Write the serialized instruction, containing the opcode and the arguments
for (var instr : bc) {
// Add the opcode
builder.add(instr.op().value());
// Add the arguments
var args = args(instr);
var args = args(instr, cpb);
if (args.length != instr.op().nArgs())
throw new AssertionError(
"Sanity check failed: number of arguments "
Expand All @@ -34,22 +58,23 @@ public ImmutableIntArray buildRaw() {
+ " is not equal to instr.op().nArgs()");
builder.addAll(args);
}
return builder.build();
return new GNURByteCode(builder.build(), cpb.build());
}

/** Converts the arguments of the provided BcInstr to a "raw" format; i.e. an array of integers */
public int[] args(@NotNull BcInstr instr) {
public int[] args(@NotNull BcInstr instr, ConstPool.Builder cpb) {
return switch (instr) {
case BcInstr.Goto i -> new int[] {mapping.extract(i.label())};
case BcInstr.BrIfNot i -> new int[] {i.ast().idx(), mapping.extract(i.label())};
case BcInstr.StartLoopCntxt i -> new int[] {i.isForLoop() ? 1 : 0, mapping.extract(i.end())};
case BcInstr.Goto i -> new int[] {labelMapping.extract(i.label())};
case BcInstr.BrIfNot i -> new int[] {i.ast().idx(), labelMapping.extract(i.label())};
case BcInstr.StartLoopCntxt i ->
new int[] {i.isForLoop() ? 1 : 0, labelMapping.extract(i.end())};
case BcInstr.EndLoopCntxt i ->
new int[] {
i.isForLoop() ? 1 : 0,
};
case BcInstr.StartFor i ->
new int[] {i.ast().idx(), i.elemName().idx(), mapping.extract(i.step())};
case BcInstr.StepFor i -> new int[] {mapping.extract(i.body())};
new int[] {i.ast().idx(), i.elemName().idx(), labelMapping.extract(i.step())};
case BcInstr.StepFor i -> new int[] {labelMapping.extract(i.body())};
case BcInstr.LdConst i -> new int[] {i.constant().idx()};
case BcInstr.GetVar i -> new int[] {i.name().idx()};
case BcInstr.DdVal i -> new int[] {i.name().idx()};
Expand All @@ -60,6 +85,7 @@ public int[] args(@NotNull BcInstr instr) {
case BcInstr.GetBuiltin i -> new int[] {i.name().idx()};
case BcInstr.GetIntlBuiltin i -> new int[] {i.name().idx()};
case BcInstr.MakeProm i -> new int[] {i.code().idx()};
case BcInstr.SetTag i -> new int[] {i.tag() == null ? -1 : i.tag().idx()};
case BcInstr.PushConstArg i -> new int[] {i.constant().idx()};
case BcInstr.Call i -> new int[] {i.ast().idx()};
case BcInstr.CallBuiltin i -> new int[] {i.ast().idx()};
Expand All @@ -83,19 +109,22 @@ public int[] args(@NotNull BcInstr instr) {
case BcInstr.And i -> new int[] {i.ast().idx()};
case BcInstr.Or i -> new int[] {i.ast().idx()};
case BcInstr.Not i -> new int[] {i.ast().idx()};
case BcInstr.StartSubassign i -> new int[] {i.ast().idx(), mapping.extract(i.after())};
case BcInstr.StartAssign i -> new int[] {i.name().idx()};
case BcInstr.EndAssign i -> new int[] {i.name().idx()};
case BcInstr.StartSubset i -> new int[] {i.ast().idx(), labelMapping.extract(i.after())};
case BcInstr.StartSubassign i -> new int[] {i.ast().idx(), labelMapping.extract(i.after())};
case BcInstr.StartC i -> new int[] {i.ast().idx()};
case BcInstr.StartSubset2 i -> new int[] {i.ast().idx()};
case BcInstr.StartSubassign2 i -> new int[] {i.ast().idx()};
case BcInstr.Dollar i -> new int[] {i.ast().idx()};
case BcInstr.DollarGets i -> new int[] {i.ast().idx()};
case BcInstr.StartSubset2 i -> new int[] {i.ast().idx(), labelMapping.extract(i.after())};
case BcInstr.StartSubassign2 i -> new int[] {i.ast().idx(), labelMapping.extract(i.after())};
case BcInstr.Dollar i -> new int[] {i.ast().idx(), i.member().idx()};
case BcInstr.DollarGets i -> new int[] {i.ast().idx(), i.member().idx()};
case BcInstr.VecSubset i -> new int[] {i.ast() == null ? -1 : i.ast().idx()};
case BcInstr.MatSubset i -> new int[] {i.ast() == null ? -1 : i.ast().idx()};
case BcInstr.VecSubassign i -> new int[] {i.ast() == null ? -1 : i.ast().idx()};
case BcInstr.MatSubassign i -> new int[] {i.ast() == null ? -1 : i.ast().idx()};
case BcInstr.And1st i -> new int[] {i.ast().idx(), mapping.extract(i.shortCircuit())};
case BcInstr.And1st i -> new int[] {i.ast().idx(), labelMapping.extract(i.shortCircuit())};
case BcInstr.And2nd i -> new int[] {i.ast().idx()};
case BcInstr.Or1st i -> new int[] {i.ast().idx(), mapping.extract(i.shortCircuit())};
case BcInstr.Or1st i -> new int[] {i.ast().idx(), labelMapping.extract(i.shortCircuit())};
case BcInstr.Or2nd i -> new int[] {i.ast().idx()};
case BcInstr.GetVarMissOk i -> new int[] {i.name().idx()};
case BcInstr.DdValMissOk i -> new int[] {i.name().idx()};
Expand All @@ -104,27 +133,42 @@ public int[] args(@NotNull BcInstr instr) {
case BcInstr.EndAssign2 i -> new int[] {i.name().idx()};
case BcInstr.SetterCall i -> new int[] {i.ast().idx(), i.valueExpr().idx()};
case BcInstr.GetterCall i -> new int[] {i.ast().idx()};
case BcInstr.Switch i ->
new int[] {
i.ast().idx(),
i.names() == null ? -1 : i.names().idx(),
i.chrLabelsIdx() == null ? -1 : i.chrLabelsIdx().idx(),
i.numLabelsIdx() == null ? -1 : i.numLabelsIdx().idx(),
};
case BcInstr.Switch i -> {
var chrLabelsIdx = i.chrLabelsIdx();
var numLabelsIdx = i.numLabelsIdx();

// Map the contents of the IntSXP referenced at i.chrLabelsIndex to the updated label
// positions
if (chrLabelsIdx != null) {
cpb.reset(chrLabelsIdx, this::remapLabels);
}
// Map the contents of the IntSXP referenced at i.numLabelsIndex to the updated label
// positions
if (numLabelsIdx != null && !numLabelsIdx.equals(chrLabelsIdx)) {
cpb.reset(numLabelsIdx, this::remapLabels);
}
yield new int[] {
i.ast().idx(),
i.names() == null ? -1 : i.names().idx(),
i.chrLabelsIdx() == null ? -1 : i.chrLabelsIdx().idx(),
i.numLabelsIdx() == null ? -1 : i.numLabelsIdx().idx(),
};
}

case BcInstr.StartSubsetN i ->
new int[] {
i.ast().idx(), mapping.extract(i.after()),
i.ast().idx(), labelMapping.extract(i.after()),
};
case BcInstr.StartSubassignN i ->
new int[] {
i.ast().idx(), mapping.extract(i.after()),
i.ast().idx(), labelMapping.extract(i.after()),
};
case BcInstr.VecSubset2 i -> new int[] {i.ast() == null ? -1 : i.ast().idx()};
case BcInstr.MatSubset2 i -> new int[] {i.ast() == null ? -1 : i.ast().idx()};
case BcInstr.VecSubassign2 i -> new int[] {i.ast() == null ? -1 : i.ast().idx()};
case BcInstr.MatSubassign2 i -> new int[] {i.ast() == null ? -1 : i.ast().idx()};
case BcInstr.StartSubset2N i -> new int[] {i.ast().idx(), mapping.extract(i.after())};
case BcInstr.StartSubassign2N i -> new int[] {i.ast().idx(), mapping.extract(i.after())};
case BcInstr.StartSubset2N i -> new int[] {i.ast().idx(), labelMapping.extract(i.after())};
case BcInstr.StartSubassign2N i -> new int[] {i.ast().idx(), labelMapping.extract(i.after())};
case BcInstr.SubsetN i -> new int[] {i.ast() == null ? -1 : i.ast().idx(), i.n()};
case BcInstr.Subset2N i -> new int[] {i.ast() == null ? -1 : i.ast().idx(), i.n()};
case BcInstr.SubassignN i -> new int[] {i.ast() == null ? -1 : i.ast().idx(), i.n()};
Expand All @@ -139,11 +183,16 @@ public int[] args(@NotNull BcInstr instr) {
case BcInstr.Colon i -> new int[] {i.ast().idx()};
case BcInstr.SeqAlong i -> new int[] {i.ast().idx()};
case BcInstr.SeqLen i -> new int[] {i.ast().idx()};
case BcInstr.BaseGuard i -> new int[] {i.expr().idx(), mapping.extract(i.ifFail())};
case BcInstr.BaseGuard i -> new int[] {i.expr().idx(), labelMapping.extract(i.ifFail())};
case BcInstr.DeclnkN i -> new int[] {i.n()};

// Otherwise, there are no arguments we need to serialize
default -> new int[0];
};
}

private IntSXP remapLabels(IntSXP oldLabels) {
var remapped = oldLabels.data().stream().map(labelMapping::getTarget).toArray();
return SEXPs.integer(remapped);
}
}
22 changes: 22 additions & 0 deletions src/main/java/org/prlprg/rds/RDSLogger.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@
package org.prlprg.rds;

import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
import java.util.*;
import java.util.function.Supplier;
import java.util.logging.*;
import org.prlprg.AppConfig;

@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.METHOD)
@interface RDSComponentWrite {
public String name();
}

final class RDSLogger {
private final StringBuilder output = new StringBuilder();
private final boolean shouldLog;
private int indentLevel;

// private Stack<String> componentLabels;
// private String opLabel;

/**
* @param name a description of the read or write, printed at the start of logging
*/
Expand All @@ -26,6 +39,11 @@ public static void addHandler(Handler handler) {
handler.setLevel(Level.FINE);
}

/** Updates the component currently being written */
// private void updateComponent() {
// Thread.currentThread().getStackTrace()[0].;
// }

/**
* Logs a lazily-evaluated String with the current indent level with Level.FINE.
*
Expand All @@ -39,6 +57,10 @@ private void logString(Supplier<String> msg) {
}
}

// public void setOpLabel(String opLabel) {
// this.opLabel = opLabel;
// }

/**
* Logs the reading or writing of a new SEXP by printing an identifier and increasing the
* indentation level by 1
Expand Down
Loading

0 comments on commit 4a7a285

Please sign in to comment.