-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetch-trailers.pl
executable file
·185 lines (141 loc) · 6.32 KB
/
fetch-trailers.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/perl
use strict;
use warnings;
use LWP::UserAgent;
use JSON qw/decode_json/;
############ CONFIG
#TODO Figure out how to replace this with vlc..
# Also should we always just be QuickTime?
my $play_command = "/usr/bin/mplayer -fs -zoom -quiet -user-agent QuickTime -cache-min 10 -cache 16384";
my $button_template = "\t<button>\n\t\t<type>VIDEO_BROWSER</type>\n\t\t<text>%s</text>\n\t\t<action>EXEC $play_command '%s'</action>\n\t</button>\n";
my $output_dir = ( $ARGV[0] // "/usr/local/share/mythtv/themes/defaultmenu" );
############
my $ua = LWP::UserAgent->new( agent => 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36' );
for my $trailer_page ( qw{page/1 coming-soon most-watched top-movies opening-this-week coming-soon netflix-new-releases} ) {
my $menu_file = "hd-trailers-$trailer_page.xml";
if( $trailer_page eq 'page/1' ) { $menu_file = "hd-trailers-latest.xml" }
my $hd_trailers_url = "http://www.hd-trailers.net/$trailer_page";
my $resp = $ua->get( $hd_trailers_url );
if( not $resp->is_success ) {
die "Failed to fetch [$hd_trailers_url]: ", $resp->code, " -- ", $resp->decoded_content, "\n";
}
my $index_content = $resp->decoded_content;
# The new menu we're producing.
my $new_trailer_menu = '<mythmenu name="$trailer_page">' . "\n";
my %seen_movie;
# Parse the HTML using a regex to avoid an actual parser dependency. We may go to hell for this.
while( $index_content =~ m{href="(/movie/[^"]+)"}g ) {
my $movie_url = $1;
next if $movie_url =~ /#autoplay/; #Skip the 'duplicate' links for each movie.
next if $seen_movie{ $movie_url }++; #Skip URLs we've already seen
my $abs_movie_url = URI->new_abs( $movie_url, $hd_trailers_url );
my $resp = $ua->get( $abs_movie_url );
if( not $resp->is_success ) {
warn "Failed to fetch [$abs_movie_url]: ", $resp->code, " -- ", $resp->decoded_content, "\n";
next;
}
if( -t STDOUT ) {
print "Parsing $movie_url\n";
}
my $movie_content = $resp->decoded_content;
# Attempt to find a movie title
my $title = "Unnamed Trailer";
# These regexes are super fragile. If we're getting unnamed trailers its probably because these stopped matching.
# Possible future problems: using ' instead of " to quote attributes, content attribute comes before property attribute, attribute name changes..
if( $movie_content =~ m{<meta.*?property="og:title".*?content="([^"]+)"/>}i
or $movie_content =~ m{<meta.*?name="twitter:title".*?content="([^"]+)"/>}i
) {
$title = $1;
}
# Attempt to find a uri for this movie
my $stream_uri;
# Match a "http://www.hd-trailers.net/yahoo-redir.php?id=a99df691-58d6-31d0-863e-79ba29b97896&resolution=720" style url *somewhere* on the page.
if( $movie_content =~ m{yahoo-redir.php\?id=.*?([a-zA-Z0-9-]+)} ) {
my $movie_id = $1;
#TODO Figure out what plrs is supposed to be, heh.
my $yql_query = "http://video.query.yahoo.com/v1/public/yql?callback=&q=SELECT * FROM yahoo.media.video.streams WHERE id='$movie_id' AND format='mp4' AND protocol='http' AND plrs='sdwpWXbKKUIgNzVhXSce__' AND region='US'&env=prod&format=json";
my $yql_resp = $ua->get( $yql_query );
if( not $yql_resp->is_success ) {
warn "Failed to fetch yql for [$movie_id]: ", $resp->code, " -- $movie_content\n";
next;
}
my $yql_data = eval { decode_json $yql_resp->decoded_content };
if( $@ or not $yql_data ) {
warn "Failed to receive a usable response from yql!\nQuery: $yql_query\nErr: $@\nContent: ", $yql_resp->decoded_content, "\n";
next;
}
my $streams = $yql_data->{query}->{results}->{mediaObj}->[0]->{streams};
if( not $streams or not @$streams ) {
warn "Failed to get any streams for [$movie_id]!\n";
next;
}
# Sort for highest bitrate
my( $best_stream ) = sort { $b->{bitrate} <=> $a->{bitrate} } @$streams;
$stream_uri = $best_stream->{host} . $best_stream->{path};
}
# Check for apple movie trailers
# We prefer apple trailers to avoid using hd-trailer's bandwidth!
#http://trailers.apple.com/movies/independent/frozen/frozen-tlr1_h1080p.mov
#http://movietrailers.apple.com/movies/wb/thehobbit2/thehobbit2-sneakpeek_h1080p.mov
elsif( $movie_content =~ m{http://(?:movie)?trailers.apple.com/movies/.+\.mov}i ) {
my @apple_uris;
while( $movie_content =~ m{href="(http://(?:movie)?trailers.apple.com/movies/[^"]+\.mov)"}ig ) {
my $uri = $1;
my $res = 0;
if( $uri =~ /(\d+)/ ) {
$res = $1;
}
push @apple_uris, [$uri,$res];
}
( $stream_uri ) = map { $_->[0] } sort { $b->[1] <=> $a->[1] } @apple_uris;
}
# Check for 'locally' mirrored trailers
elsif( $movie_content =~ m{href="http://videos.hd-trailers.net/\w+}i ) {
my @trailer_uris;
while( $movie_content =~ m{href="(http://videos.hd-trailers.net/[^"]+\.[a-z0-9]{2,4})"}ig ) {
my $uri = $1;
my $res = 0;
if( $uri =~ /(\d+)/ ) {
$res = $1;
}
push @trailer_uris, [$uri,$res];
}
( $stream_uri ) = map { $_->[0] } sort { $b->[1] <=> $a->[1] } @trailer_uris;
}
elsif( $movie_content =~ m{href="http://avideos.5min.com[^"]+\.mp4"}i ) {
my @trailer_uris;
while( $movie_content =~ m{href="(http://avideos.5min.com[^"]+?(?:_(\d+))?\.mp4)"}ig ) {
push @trailer_uris, [ $1, $2 // 0 ];
}
( $stream_uri ) = map { $_->[0] } sort { $b->[1] <=> $a->[1] } @trailer_uris;
}
elsif( $movie_content =~ m{href="http://www.youtube.com/watch\?}i ) {
#TODO Add support for youtube trailers!
# Perhaps via youtube-dl?
# mplayer -fs "$(youtube-dl '$stream_uri')" appears to work!
# But will it work in an EXEC command?
warn "Skipping $movie_url: youtube only.\n";
next;
}
else {
warn "Failed to find any kind of trailer uri for [$movie_url]\n";
next;
}
$stream_uri =~ s/&/&/g; #We're inserting this into XML but does mythtv actually care?
$stream_uri =~ s/'/'/g;
$stream_uri =~ s/"/"/g;
$stream_uri =~ s/</</g;
$stream_uri =~ s/>/>/g;
$new_trailer_menu .= sprintf $button_template, $title, $stream_uri;
}
$new_trailer_menu .= "</mythmenu>";
# Check to see if we've actually generated a new file since we could have failed to parse/fetch every single trailer
if( length $new_trailer_menu > 30 ) {
my $output_file = "$output_dir/$menu_file";
open my $fh, ">", $output_file or die "Failed to open $output_file: $!\n";
print $fh $new_trailer_menu;
}
else {
warn "I think we failed on every single trailer:\n $new_trailer_menu\n";
}
}