Skip to content

Commit

Permalink
慕课课件
Browse files Browse the repository at this point in the history
  • Loading branch information
Benature committed Apr 6, 2020
1 parent 2cd7a28 commit 6735b9b
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 1 deletion.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@
- [课件网盘地址我懒得记--url_jump](./url_jump)
- [其他](./other)
- [16进制转2进制——计算误码率](./other/hex2bin.py)
- [考满分万能句型练习数据爬取](./other/KMF_write_sentence.ipynb)
- [爬虫](./spider)
- [考满分万能句型练习数据爬取](./spider/KMF_write_sentence.ipynb)
- [慕课课件下载](./spider/慕课课件下载.ipynb)
2 changes: 2 additions & 0 deletions lecture_rename/readme.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# 修改文件名中的序号位置

查看[推送](https://mp.weixin.qq.com/s/TGFkojxZoBCOPwDz101pIg)阅读更多

考虑文件的序号在末尾(或其他位置).

仅考虑了文件名中除了序号以外不含有其他数字.
Expand Down
File renamed without changes.
139 changes: 139 additions & 0 deletions spider/慕课课件下载.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-02T12:51:15.439691Z",
"start_time": "2020-04-02T12:51:15.436872Z"
}
},
"outputs": [],
"source": [
"import requests\n",
"from bs4 import BeautifulSoup as bs\n",
"from selenium import webdriver\n",
"import time"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-02T12:48:15.750997Z",
"start_time": "2020-04-02T12:48:15.748665Z"
}
},
"outputs": [],
"source": [
"dir_path = '电工学/'\n",
"url = 'http://www.icourse163.org/learn/HIT-1001998009?tid=1002098003#/learn/content'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-02T13:01:23.895292Z",
"start_time": "2020-04-02T13:01:21.674177Z"
}
},
"outputs": [],
"source": [
"driver = webdriver.Chrome()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2020-04-02T13:41:01.478542Z",
"start_time": "2020-04-02T13:36:20.629957Z"
}
},
"outputs": [],
"source": [
"driver.get(url) # 打开网页​\n",
"last_file = ''\n",
"jump = 5\n",
"for i, ele in enumerate(driver.find_elements_by_class_name('j-titleName')[jump:]):\n",
" driver.get(url)\n",
" time.sleep(1) # 等待页面加载完成\n",
" driver.find_elements_by_class_name('j-titleName')[0].click()\n",
" driver.find_elements_by_class_name('j-titleName')[i+jump].click()\n",
" html = bs(driver.page_source, 'lxml') \n",
" # driver.get('http://www.baidu.com')\n",
" driver.find_elements_by_class_name('j-titleName')[0].click()\n",
" for j, ll in enumerate(html.select('.u-learnLesson.normal.f-cb.f-pr')):\n",
" print(i, j)\n",
" driver.get(url)\n",
" time.sleep(1)\n",
" driver.find_elements_by_class_name('j-titleName')[0].click()\n",
" driver.find_elements_by_class_name('j-titleName')[i+jump].click()\n",
"# driver.find_elements_by_class_name('lsicon')[j].click()\n",
" html = bs(driver.page_source, 'lxml') \n",
" driver.find_element_by_id(html.select('.u-learnLesson.normal.f-cb.f-pr')[j]['id']).click()\n",
" time.sleep(1)\n",
" html = bs(driver.page_source, 'lxml') \n",
" file_path = dir_path + html.select('.up.j-up.f-thide')[1].text.replace(' ', '') + '.pdf'\n",
" if last_file == file_path:\n",
" print('jump', j)\n",
" continue\n",
"# if len(html.select('li.f-fl')) == 1:\n",
"# continue\n",
" for fl in html.select('li.f-fl'):\n",
" if '文档' in fl['title']:\n",
" id_ = fl['id']\n",
" break\n",
" \n",
" driver.find_element_by_id(id_).click()\n",
" time.sleep(1)\n",
" html = bs(driver.page_source, 'lxml') \n",
" print(file_path)\n",
" r = requests.get(html.select('.j-downpdf')[0]['href'])\n",
" with open(file_path,\"wb\") as code:\n",
" code.write(r.content)\n",
" last_file = file_path"
]
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": true
}
},
"nbformat": 4,
"nbformat_minor": 2
}
2 changes: 2 additions & 0 deletions url_jump/readme.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# 一个快速打开网盘链接的方法

查看[推送](https://mp.weixin.qq.com/s/LUO55UsikPF4Qo8IBCyDqg)阅读更多

## 说明

老师把课件放到百度网盘了, 又不想把网盘链接添加到浏览器书签.
Expand Down

0 comments on commit 6735b9b

Please sign in to comment.