Skip to content
This repository has been archived by the owner on May 21, 2022. It is now read-only.

Commit

Permalink
Merge pull request #47 from JuliaML/ox/neodataframes
Browse files Browse the repository at this point in the history
Update for DataFrameRow changes
  • Loading branch information
oxinabox authored Apr 5, 2019
2 parents 67c9271 + 4b77d0d commit 2f845c6
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 9 deletions.
2 changes: 1 addition & 1 deletion REQUIRE
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ StatsBase 0.13
LearnBase 0.2.0 0.3.0
MLLabelUtils 0.4.0
MLDataPattern 0.5.0
DataFrames 0.11
DataFrames 0.17
13 changes: 12 additions & 1 deletion src/datapattern.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,27 @@ import DataFrames: DataFrames, AbstractDataFrame, SubDataFrame
LearnBase.nobs(dt::AbstractDataFrame) = DataFrames.nrow(dt)
LearnBase.getobs(dt::AbstractDataFrame, idx) = dt[idx,:]

LearnBase.nobs(dt::DataFrameRow) = 1 # it is a observation
function LearnBase.getobs(dt::DataFrameRow, idx)
idx == 1:1 || throw(ArgumentError(
"Attempting to read multiple rows ($idx) with a single row"))

return dt
end

# custom data subset in form of SubDataFrame
LearnBase.datasubset(dt::AbstractDataFrame, idx, ::ObsDim.Undefined) =
view(dt, idx)
@view dt[idx, :]

# throw error if no target extraction function is supplied
LearnBase.gettarget(::typeof(identity), dt::AbstractDataFrame) =
_throw_table_error()
LearnBase.gettarget(::typeof(identity), dt::DataFrameRow) =
_throw_table_error()

# convenience syntax to allow column name
LearnBase.gettarget(col::Symbol, dt::AbstractDataFrame) = dt[1, col]
LearnBase.gettarget(col::Symbol, dt::DataFrameRow) = dt[col]
LearnBase.gettarget(fun, dt::AbstractDataFrame) = fun(dt)

# avoid copy when target extraction function is supplied
Expand Down
18 changes: 11 additions & 7 deletions test/tst_datapattern.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,24 @@
df = DataFrame(x1 = rand(5), x2 = rand(5), y = [:a,:a,:b,:a,:b])

@test_throws ArgumentError targets(df)
@test_throws ArgumentError targets(df[1,:]) # DataFrameRow

@test targets(:y, df) == [:a,:a,:b,:a,:b]
@test targets(row->row[1,:y], df) == [:a,:a,:b,:a,:b]
@test targets(row->row[:y], df) == [:a,:a,:b,:a,:b]
@test typeof(targets(:y, df)) == Vector{Symbol}
@test eltype(targets(x->x, df)) <: SubDataFrame

@test nobs(@inferred(undersample(:y, df))) === 4

@test @inferred(getobs(df, 2)) == df[2,:]
@test @inferred(getobs(datasubset(df, 2))) == df[2,:]
@test @inferred(getobs(df, 2:3)) == df[2:3,:]
@test @inferred(getobs(datasubset(df, 2:3))) == df[2:3,:]

@test typeof(datasubset(df, 2)) <: SubDataFrame
@test @inferred(datasubset(df, [1,2,3,5])) == view(df, [1,2,3,5])
@test @inferred(datasubset(df, 2)) == view(df, 2)
@test @inferred(datasubset(datasubset(df, 2:3), 2)) == view(df, 3)
@test @inferred(datasubset(df)) == view(df, 1:5)

@test typeof(datasubset(df, 2)) <: DataFrameRow
@test typeof(datasubset(df, 2:5)) <: SubDataFrame
@test @inferred(datasubset(df, [1,2,3,5])) == df[[1,2,3,5], :]
@test @inferred(datasubset(df, 2)) == df[2, :]
@test @inferred(datasubset(datasubset(df, 2:3), 2)) == df[3, :]
@test @inferred(datasubset(df)) == df
end

0 comments on commit 2f845c6

Please sign in to comment.