-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathvl-download
executable file
·147 lines (121 loc) · 4.16 KB
/
vl-download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env python
######################################################################
#
# vl-download.py : Download videos from videolectures.net
# Author : Mayank Juneja (mayankjuneja1[AT]gmail[DOT]com)
# Version : 0.1
#
######################################################################
import urllib2
import re
import os
import sys
from BeautifulSoup import BeautifulSoup
from optparse import OptionParser
from operator import itemgetter
# Script Description
desc = "videolectures.net video downloader"
vers = "0.2"
# Initialize Option Parser
parser = OptionParser(description=desc,version = vers)
# Add options
parser.add_option("-u","--url",
dest="url",
help="videolecture.net URL",
metavar="URL")
parser.add_option("-o","--outfile",
dest="outfile",
help="Output file Name",
metavar="outfile")
# Parse arguments
(options,args) = parser.parse_args()
# Check for some mandatory Options
if options.url is None:
# No URL specified
parser.error("--url option is required")
sys.exit(1)
if options.outfile is None :
# Output file not specified
options.outfile = ""
url = options.url
outfile = options.outfile
request_headers = {
'User-Agent' : 'User-Agent: Mozilla/5.0 (X11; Linux i686; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
'Accept' : 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language' : 'en-us,en;q=0.5',
'Accept-Charset' : 'Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Keep-Alive' : '300',
'Proxy-Connection' : 'keep-alive',
'Cache-Control' : 'max-age=0'
}
url = url.rstrip("/")
metadata_url = url + "/video/1/smil.xml"
req = urllib2.Request(metadata_url, None, headers=request_headers)
response = urllib2.urlopen(req)
html_page = response.read()
html_page = BeautifulSoup(html_page)
## Find the metadata (Author, Title, Date)
title = html_page.findAll('meta', {'name':'title'}) or ''
author = html_page.findAll('meta', {'name':'author'}) or ''
if title: title = title[0].get('content', '').encode('ascii','ignore')
if author: author = author[0].get('content', '').encode('ascii','ignore')
videos = []
video_tags = html_page.findAll("video")
for tag in video_tags:
video = {
'id' : int(tag.get('id', -1)),
'proto' : tag.get('proto', ''),
'width' : int(tag.get('width', -1)),
'height' : int(tag.get('height', -1)),
'size' : int(tag.get('size', -1)),
'ext' : tag.get('ext', ''),
'type' : tag.get('type', ''),
'src' : tag.get('src', ''),
'streamer' : tag.get('streamer', '')
}
videos.append(video)
# Get the videos with maximum size
max_size = max(videos, key=itemgetter('size'))['size']
videos = [video for video in videos if video['size'] == max_size]
video_urls = {}
for video in videos:
video_urls[video['proto']] = {
'streamer' : video.get('streamer', ''),
'src' : video.get('src', ''),
'ext' : video.get('ext', '')
}
if(outfile==""):
outfile = title.replace(" ","_") + "-" + author.replace(" ","_")
print "Downloading video: ", outfile
# Try for http
if 'http' in video_urls.keys():
v = video_urls['http']
outfile = outfile + "." + v['ext']
req = urllib2.Request(v['src'], None, headers=request_headers)
u = urllib2.urlopen(req)
meta = u.info()
file_size = int(meta.getheaders("Content-Length")[0])
f = open(outfile, 'wb')
file_size_dl = 0
block_sz = 8192
while True:
buffer = u.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
f.write(buffer)
status = r"Status: [%3.2f%%]" % (file_size_dl * 100. / file_size)
status = status + chr(8)*(len(status)+1)
print status,
f.close()
# Try for rtmp
elif 'rtmp' in video_urls.keys():
v = video_urls['rtmp']
outfile = outfile + "." + v['ext']
rtmp_path = v['streamer']
rtmp_url = v['src']
cmd = ("rtmpdump -r %s -y %s -o \"%s.flv\"")%(rtmp_url, rtmp_path, outfile)
print cmd
os.system(cmd)
else:
print "Could not find a supported protocol"