Skip to content

Commit

Permalink
feat: 书源可指定
Browse files Browse the repository at this point in the history
  • Loading branch information
Cansiny0320 committed May 27, 2021
1 parent 9a2db82 commit 8c0f512
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 10 deletions.
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

🎉 开箱即用的高性能可配置小说爬虫 快速下载无广告 txt 文件

书源来自各种笔趣阁

## ✨ 特性

📂 批量下载
Expand Down Expand Up @@ -42,6 +44,14 @@ $ yarn spider [小说名]
$ yarn spider download
```

默认会自动选择最快书源,若要指定书源

```bash

$ yarn spider --source [url] [小说名]

```

## ❗ tips

支持批量下载 小说名之间用空格分开
Expand All @@ -54,7 +64,7 @@ $ yarn spider download

**如何配置来源网站**

按照如下格式修改 `config.ts` 中的 `source` 数组
按照如下格式修改 `config.ts` 中的 `source` 数组,无需关心书源顺序

具体可以参考已配置的网站

Expand Down
5 changes: 4 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
"main": "index.js",
"license": "MIT",
"scripts": {
"spider": "npx ts-node src/index.ts"
"spider": "npx ts-node src/index.ts",
"test": "npx ts-node src/test.ts"
},
"devDependencies": {
"@types/node": "^15.3.1",
"@types/optimist": "^0.0.29",
"@types/signale": "^1.4.1",
"ts-node": "^9.1.1",
"typescript": "^4.2.4"
Expand All @@ -17,6 +19,7 @@
"@types/cheerio": "^0.22.28",
"axios": "^0.21.1",
"cheerio": "^1.0.0-rc.9",
"optimist": "^0.6.1",
"signale": "^1.4.0"
}
}
21 changes: 16 additions & 5 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,27 @@
import fs from "fs"
import optimist from "optimist"
import { IOptions } from "./interface"
const argv = optimist.argv

import { Spider } from "./spider"
import { getSpecSource } from "./utils"

const args = process.argv.slice(2)
const bookNames = argv._ as string[]
const url = argv.source as string

if (args[0] === "download") {
const options: IOptions = {}

if (url) {
options.source = getSpecSource(url)
}

if (bookNames[0] === "download") {
const download = fs.readFileSync("./download.txt", "utf-8").split("\r\n")
download.forEach(item => {
new Spider(item)
new Spider(item, options)
})
} else {
args.forEach(item => {
new Spider(item)
bookNames.forEach(item => {
new Spider(item, options)
})
}
4 changes: 4 additions & 0 deletions src/interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,7 @@ interface ISelector {
CONTENT_TITLE: string
BOOK_CONTENT: string
}

export interface IOptions {
source?: ISource
}
16 changes: 13 additions & 3 deletions src/spider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,21 @@ import cheerio from "cheerio"
import fs from "fs"

import { DOWNLOAD_PATH, RETRY_TIMES, source } from "./config"
import { IBook, IContent, IContentUrl, ISource } from "./interface"
import { IBook, IContent, IContentUrl, IOptions, ISource } from "./interface"
import { checkFileExist, genSearchUrl, getSource, logger } from "./utils"

export class Spider {
success: number
fail: number
total: number
source!: ISource
constructor(bookName: string) {
constructor(bookName: string, options?: IOptions) {
if (options) {
const { source } = options
if (source) {
this.source = source
}
}
this.success = 0
this.fail = 0
this.total = 0
Expand Down Expand Up @@ -153,7 +159,11 @@ export class Spider {

async run(bookName: string) {
try {
this.source = await getSource(source)
if (!this.source) {
this.source = await getSource(source)
} else {
logger.log(`爬取开始,本次指定书源:${this.source.Url}`)
}
axios.defaults.baseURL = this.source.Url
const bookUrl = await this.getBookUrl(bookName)
if (!bookUrl) {
Expand Down
3 changes: 3 additions & 0 deletions src/utils.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import axios from "axios"
import fs from "fs"
import { Signale } from "signale"
import { source } from "./config"

import { IQuery, ISource } from "./interface"

Expand Down Expand Up @@ -43,6 +44,8 @@ export const getSource = async (source: ISource[]) => {
}
}

export const getSpecSource = (url: string) => source.filter(v => v.Url === url)[0]

export const checkFileExist = (path: fs.PathLike, onExist: () => void, onNotExist: () => void) => {
fs.access(path, fs.constants.F_OK, err => {
if (err) {
Expand Down
23 changes: 23 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
resolved "https://registry.nlark.com/@types/node/download/@types/node-15.3.1.tgz?cache=0&sync_timestamp=1621463831198&other_urls=https%3A%2F%2Fregistry.nlark.com%2F%40types%2Fnode%2Fdownload%2F%40types%2Fnode-15.3.1.tgz#23a06b87eedb524016616e886b116b8fdcb180af"
integrity sha1-I6Brh+7bUkAWYW6IaxFrj9yxgK8=

"@types/optimist@^0.0.29":
version "0.0.29"
resolved "https://registry.nlark.com/@types/optimist/download/@types/optimist-0.0.29.tgz#a8873580b3a84b69ac1e687323b15fbbeb90479a"
integrity sha1-qIc1gLOoS2msHmhzI7Ffu+uQR5o=

"@types/signale@^1.4.1":
version "1.4.1"
resolved "https://registry.nlark.com/@types/signale/download/@types/signale-1.4.1.tgz?cache=0&sync_timestamp=1621243058878&other_urls=https%3A%2F%2Fregistry.nlark.com%2F%40types%2Fsignale%2Fdownload%2F%40types%2Fsignale-1.4.1.tgz#6137a6fd7960b48703dd2793c5b795480368b246"
Expand Down Expand Up @@ -247,13 +252,26 @@ make-error@^1.1.1:
resolved "https://registry.npm.taobao.org/make-error/download/make-error-1.3.6.tgz#2eb2e37ea9b67c4891f684a1394799af484cf7a2"
integrity sha1-LrLjfqm2fEiR9oShOUeZr0hM96I=

minimist@~0.0.1:
version "0.0.10"
resolved "https://registry.nlark.com/minimist/download/minimist-0.0.10.tgz?cache=0&sync_timestamp=1618846813226&other_urls=https%3A%2F%2Fregistry.nlark.com%2Fminimist%2Fdownload%2Fminimist-0.0.10.tgz#de3f98543dbf96082be48ad1a0c7cda836301dcf"
integrity sha1-3j+YVD2/lggr5IrRoMfNqDYwHc8=

nth-check@^2.0.0:
version "2.0.0"
resolved "https://registry.nlark.com/nth-check/download/nth-check-2.0.0.tgz#1bb4f6dac70072fc313e8c9cd1417b5074c0a125"
integrity sha1-G7T22scAcvwxPoyc0UF7UHTAoSU=
dependencies:
boolbase "^1.0.0"

optimist@^0.6.1:
version "0.6.1"
resolved "https://registry.npm.taobao.org/optimist/download/optimist-0.6.1.tgz#da3ea74686fa21a19a111c326e90eb15a0196686"
integrity sha1-2j6nRob6IaGaERwybpDrFaAZZoY=
dependencies:
minimist "~0.0.1"
wordwrap "~0.0.2"

p-limit@^1.1.0:
version "1.3.0"
resolved "https://registry.nlark.com/p-limit/download/p-limit-1.3.0.tgz#b86bd5f0c25690911c7590fcbfc2010d54b3ccb8"
Expand Down Expand Up @@ -367,6 +385,11 @@ typescript@^4.2.4:
resolved "https://registry.nlark.com/typescript/download/typescript-4.2.4.tgz?cache=0&sync_timestamp=1621581792911&other_urls=https%3A%2F%2Fregistry.nlark.com%2Ftypescript%2Fdownload%2Ftypescript-4.2.4.tgz#8610b59747de028fda898a8aef0e103f156d0961"
integrity sha1-hhC1l0feAo/aiYqK7w4QPxVtCWE=

wordwrap@~0.0.2:
version "0.0.3"
resolved "https://registry.npm.taobao.org/wordwrap/download/wordwrap-0.0.3.tgz#a3d5da6cd5c0bc0008d37234bbaf1bed63059107"
integrity sha1-o9XabNXAvAAI03I0u68b7WMFkQc=

[email protected]:
version "3.1.1"
resolved "https://registry.npm.taobao.org/yn/download/yn-3.1.1.tgz#1e87401a09d767c1d5eab26a6e4c185182d2eb50"
Expand Down

0 comments on commit 8c0f512

Please sign in to comment.