-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathbigpicvotes
executable file
·119 lines (93 loc) · 2.57 KB
/
bigpicvotes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env ruby
# Start Date: Wednesday November 19, 2008
# Current Version: 0.9
# Author: Joseph Pecoraro
# Contact: [email protected]
# Decription: Analyze the user comments for top images
# because the commenters often put the numbers for
# the images they really liked.
require 'rubygems'
require 'hpricot'
require 'net/http'
require 'uri'
# Wrapper around the functionality
class BigPictureAnalyzer
# Takes in a valid Hpricot doc (assumes validation
# of the URL it was taken from occured elsewhere)
def initialize(url, body)
@url = url
@doc = Hpricot(body)
@votes = nil
end
# Analyze the doc
def analyze(n=0)
# The title
@title = (@doc/'h2')[0].innerText
# Find the Number of images to make a vote array
max = (@doc/'div.bpBody a')[-1].innerText.match(/\d+/).to_s.to_i
@votes = Array.new(max+1, 0)
# Search Comments
(@doc/'.commentBodyText').each do |com|
com.to_s.gsub(/https?:.*?(\s|[<>])/i, 'zz').scan(/\d+/) do |num|
num = num.to_i
@votes[num] = @votes[num] + 1 if num <= max && num > 0
end
end
# Ugly Sort Trick
# [1, 5, 3] => ["1:0", "5:1", "3:2"] => sort => ["5:1", "3:2", "1:0"]
@votes.each_with_index { |e, i| @votes[i] = "#{@votes[i]}:#{i}" }
@votes = @votes.sort.sort do |a,b|
numa = a.split(/:/)[0].to_i
numb = b.split(/:/)[0].to_i
numb - numa
end
# Possibly print the results
print_top_n(n) if n > 0
end
# Print the results
def print_top_n(n=10)
# Error handling
n = 10 if n <= 0
n = @votes.length-1 if n > @votes.length
# Print header and top n
puts
puts "----------------------------"
puts @title
puts "Top Images Based on Comments"
puts "----------------------------\n\n"
0.upto(n-1) do |i|
cnt, img = @votes[i].split(':')
puts " %2d: Image #%-2d - %d votes" % [i+1, img, cnt]
end
puts
end
end
# When run as as script
if $0 == __FILE__
# Expect at least one command line arg (1 to 2 args)
unless (1..2) === ARGV.size
program_name = $0.split(/\//).last
puts "usage: #{program_name} url [top#]"
exit 1
end
# Command line args
url = ARGV[0]
n = ARGV[1].to_i || 10
n = 10 if n <= 0
# Turn the url into a 'comments_' url if needed
uri = URI.parse(url)
parts = uri.path.split(/\//)
last = parts.last
unless last =~ /^comments_/
parts[ parts.length-1 ] = 'comments_' + last
uri.path = parts.join('/')
end
# Validate the the URI exists, and parse it if it does
begin
res = Net::HTTP.get_response uri
BigPictureAnalyzer.new(url, res.body).analyze(n)
rescue
puts "Page did not exist. Please correct the URL"
exit 2
end
end