-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathwordcount.lua
123 lines (101 loc) · 2.88 KB
/
wordcount.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
--[[
Counts words in a document
Images and tables are ignored; words in text body do not include referece section
This filter is an adapted mixture of
https://github.com/pandoc/lua-filters/blob/master/wordcount/wordcount.lua
and
https://github.com/pandoc/lua-filters/blob/master/section-refs/section-refs.lua
]]
local body_words = 0
local ref_words = 0
function is_table (blk)
return (blk.t == "Table")
end
function is_image (blk)
return (blk.t == "Image")
end
function remove_all_tables_images (blks)
local out = {}
for _, b in pairs(blks) do
if not (is_table(b) or is_image(b)) then
table.insert(out, b)
end
end
return out
end
function is_ref_div (blk)
return (blk.t == "Div" and blk.identifier == "refs")
end
function is_ref_header (blk)
local metadata_title = refs_title
if refs_title then
metadata_title = metadata_title[1].c:lower():gsub(" ", "-")
end
return (blk.t == "Header" and (blk.identifier == "references" or blk.identifier == metadata_title))
end
function get_all_refs (blks)
local out = {}
for _, b in pairs(blks) do
if is_ref_div(b) then
table.insert(out, b)
end
end
return out
end
function remove_all_refs (blks)
local out = {}
for _, b in pairs(blks) do
if not (is_ref_div(b) or is_ref_header(b)) then
table.insert(out, b)
end
end
return out
end
body_count = {
Str = function(el)
-- we don't count a word if it's entirely punctuation:
if el.text:match("%P") then
body_words = body_words + 1
end
end,
Code = function(el)
_,n = el.text:gsub("%S+","")
body_words = body_words + n
end,
CodeBlock = function(el)
_,n = el.text:gsub("%S+","")
body_words = body_words + n
end
}
ref_count = {
Str = function(el)
-- we don't count a word if it's entirely punctuation:
if el.text:match("%P") then
ref_words = ref_words + 1
end
end
}
function Pandoc(el)
if PANDOC_VERSION == nil then -- if pandoc_version < 2.1
io.stderr:write("WARNING: pandoc >= 2.1 required for wordcount filter\n")
return el
end
local untabled = remove_all_tables_images(el.blocks)
refs_title = el.meta["reference-section-title"]
local unreffed = remove_all_refs(untabled)
pandoc.walk_block(pandoc.Div(unreffed), body_count)
local body_words_out = body_words .. " words in text body"
local refs = get_all_refs(untabled)
pandoc.walk_block(pandoc.Div(refs), ref_count)
local ref_words_out = ref_words .. " words in reference section"
local total_words_out = body_words + ref_words .. " total words"
local longest_out = math.max(string.len(body_words_out),
string.len(ref_words_out),
string.len(total_words_out))
print(total_words_out)
print(string.rep("-", longest_out))
print(body_words_out)
print(ref_words_out)
print()
return el
end