Skip to content

Commit

Permalink
fix: split function accounting for quotes (#158)
Browse files Browse the repository at this point in the history
* fix #157: split function accounting for quotes

Added Util.splitCSVLine() function that splits a CSV-formatted string into an array of tokens, accounting for quotes (single and double) and escape characters.

* fix CI warning

Warning: "src/util/Util.lua:263:24: (W311) value assigned to variable sep is unused"

* update split function

- renamed from splitCSVLine to splitEnhanced, since it's not compliant to RFC 4180 and it could be  deceiving;
- added optional parameters;
- now throws error if quotes are not closed;

* update Util.splitEnhanced()

- fixed its behaviour to comply with Casbin documentation (see the note at https://casbin.org/docs/policy-storage#loading-policy-from-a-csv-file and issue 886 at casbin/casbin):
  "If your file contains commas and double quotes, you should enclose the field in double quotes and double any embedded double quotes."

Therefore I removed the extra behaviour related to single quotes ' and escape character \ and refactored the function.

* added example with double quotes

* Unit test for Util.splitEnhanced()

* fixed basic with regex example

* Update basic_policy_with_regex.csv

typo

* Unit test for regexMatch with {N,M} quantifier

* more unit tests for Util.splitEnhanced

- check if the last field is a quoted field
- throwing error when there are extra characters after the double quote that closes the quoted field.

* Update Util.lua

- support for quotes in last field;
- throws an exception if there are other characters after the double quote that closes the quoted field.

* changed "sep" parameter name to "delim"

(uniform to Util.split() )

* changed "line" parameter name to "str"

(uniform to Util.split() )
  • Loading branch information
mikyll authored Jul 10, 2024
1 parent 24bd8d6 commit f40b2ea
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 1 deletion.
11 changes: 11 additions & 0 deletions examples/basic_model_with_regex.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[request_definition]
r = sub, obj, act

[policy_definition]
p = sub, obj, act

[policy_effect]
e = some(where (p.eft == allow))

[matchers]
m = regexMatch(r.sub, p.sub) && regexMatch(r.obj, p.obj) && regexMatch(r.act, p.act)
3 changes: 3 additions & 0 deletions examples/basic_policy_with_regex.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
p, ^admin$, .*, .*
p, ^user.*, "^/users/[a-zA-Z0-9]{4,10}$", PUT|POST|PATCH|GET|OPTIONS
p, ^guest$, ^/guest/.*, GET|OPTIONS
2 changes: 1 addition & 1 deletion src/persist/Adapter.lua
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ function Adapter.loadPolicyLine(line, model)
return
end

local tokens = Util.split(line, ",")
local tokens = Util.splitEnhanced(line, ',', true)
local key = tokens[1]
local sec = key:sub(1, 1)

Expand Down
81 changes: 81 additions & 0 deletions src/util/Util.lua
Original file line number Diff line number Diff line change
Expand Up @@ -252,4 +252,85 @@ function Util.printTable(t)
return s
end

function Util.isOnlyWhitespaces(str)
return str:match("^%s*$") ~= nil
end

function Util.splitEnhanced(str, delim, trimFields)
local result = {}
local i = 1
local quotedField
local escaping = false
local field = ""

if delim == nil then delim = ',' end
if trimFields == nil then trimFields = true end

-- Loop over the characters of the string
while i <= #str do
local char = str:sub(i, i)

-- Check if it's the first character and it's a double quote.
if Util.isOnlyWhitespaces(field) and char == '"' then
-- Then this field is a quoted field
quotedField = true
else
if quotedField then
if escaping then
-- ", End of quoted field
if char == delim then
if trimFields then
field = Util.trim(field)
end

table.insert(result, field)
field = ""
quotedField = false
-- "" Escapes the double quote character
elseif char == '"' then
field = field .. char
escaping = false
-- " followed by some other character (not allowed)
else
error("Quoted fields cannot have extra characters outside double quotes.")
end
else -- Not escaping
if char == '"' then
escaping = true
else
field = field .. char
end
end

else -- Not quotedField
if char == delim then
if trimFields then
field = Util.trim(field)
end

table.insert(result, field)
field = ""
else
field = field .. char
end
end
end

i = i + 1
end

-- Throw error if there are quotes left open
if quotedField and not escaping then
error("Unmatched quotes.")
end

-- Add the last field (since it won't have the delimiter after it)
if trimFields then
field = Util.trim(field)
end
table.insert(result, field)

return result
end

return Util
24 changes: 24 additions & 0 deletions tests/main/enforcer_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -515,4 +515,28 @@ describe("Enforcer tests", function ()
assert.is.True(e:enforce("bob", "data2", "write"))
assert.is.False(e:enforce("bogus", "data2", "write")) -- Non-existent subject
end)


it("regexMatch test", function ()

local model = path .. "/examples/basic_model_with_regex.conf"
local policy = path .. "/examples/basic_policy_with_regex.csv"
local e = Enforcer:new(model, policy)

assert.is.True(e:enforce("admin", "/", "PUT"))
assert.is.True(e:enforce("admin", "/admin", "GET"))
assert.is.True(e:enforce("admin", "/admin/anything", "POST"))
assert.is.False(e:enforce("admin123", "/admin", "PUT"))

assert.is.True(e:enforce("user", "/users/alice", "GET"))
assert.is.True(e:enforce("user", "/users/alice", "PUT"))
assert.is.True(e:enforce("user123", "/users/alice", "PUT"))
assert.is.False(e:enforce("user", "/users/", "PUT"))
assert.is.False(e:enforce("user", "/users/123", "PUT"))
assert.is.False(e:enforce("user", "/users/alice123456789", "PUT"))
assert.is.False(e:enforce("user", "/admin", "PUT"))

assert.is.True(e:enforce("guest", "/guest/test", "GET"))
assert.is.False(e:enforce("guest", "/guest/test", "PUT"))
end)
end)
29 changes: 29 additions & 0 deletions tests/util/util_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,35 @@ describe("util tests", function()
assert.are.same({"a", "b", "c"}, Util.split(" a, b ,c ", ","))
end)

it("test isOnlyWhitespaces", function()
assert.is.True(Util.isOnlyWhitespaces(" "))
assert.is.True(Util.isOnlyWhitespaces(""))
assert.is.True(Util.isOnlyWhitespaces("\t\t"))
assert.is.False(Util.isOnlyWhitespaces(" abc"))
assert.is.False(Util.isOnlyWhitespaces("abc\t"))
assert.is.False(Util.isOnlyWhitespaces("\""))
end)

it("test splitEnhanced", function()
assert.are.same({"a", "b", "c"}, Util.splitEnhanced("a ,b ,c", ",", true))
assert.are.same({"a", "b", "c"}, Util.splitEnhanced("a,b,c", ",", true))
assert.are.same({"a", "b", "c"}, Util.splitEnhanced("a, b, c", ",", true))
assert.are.same({"a", "b", "c"}, Util.splitEnhanced(" a, b ,c ", ",", true))

assert.are.same({"a", " b", " c"}, Util.splitEnhanced('a, b, c', ",", false))
assert.are.same({"a", "b", "c"}, Util.splitEnhanced('a,b,c', ",", false))
assert.are.same({" a", "b", "c"}, Util.splitEnhanced(' a,b,c', ",", false))
assert.are.same({"a, b", "c"}, Util.splitEnhanced('"a, b", c', ",", true))
assert.are.same({"a, b", "c"}, Util.splitEnhanced('" a, b", c', ",", true))
assert.are.same({"a == \"b\"", "c"}, Util.splitEnhanced('a == "b", c', ",", true))
assert.are.same({"a == \"b\"", "c"}, Util.splitEnhanced('"a == ""b"" ", c', ",", true))
assert.are.same({"a", "b, c"}, Util.splitEnhanced('a, "b, c"', ",", true))

assert.has_error(function () Util.splitEnhanced('a, "b, c" ', ",", true) end, "Quoted fields cannot have extra characters outside double quotes.")
assert.has_error(function () Util.splitEnhanced('"a, b" hello, c', ",", true) end, "Quoted fields cannot have extra characters outside double quotes.")
assert.has_error(function () Util.splitEnhanced('a, b, "c ') end, "Unmatched quotes.")
end)

it("test isInstance", function()
local parent = {}
parent.__index = parent
Expand Down

0 comments on commit f40b2ea

Please sign in to comment.