Skip to content

Commit

Permalink
增加13384美女图采集器
Browse files Browse the repository at this point in the history
  • Loading branch information
YangZetao authored and YangZetao committed Sep 25, 2016
1 parent 6f4e149 commit dc306a0
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 4 deletions.
32 changes: 29 additions & 3 deletions demo/13384.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
/* 不要删除这段注释 */

$configs = array(
'name' => '13384美女图',
'domains' => array(
'www.13384.com'
),
Expand Down Expand Up @@ -54,10 +55,30 @@
//'table' => 'content',
//),
'fields' => array(
// 标题
array(
'name' => "name",
'selector' => "//div[@id='Article']//h1",
'required' => true,
),
// 分类
array(
'name' => "category",
'selector' => "//div[contains(@class,'crumbs')]//span//a",
'required' => true,
),
// 发布时间
array(
'name' => "addtime",
'selector' => "//p[contains(@class,'sub-info')]//span",
'required' => true,
),
// 内容
array(
'name' => "contents",
'selector' => "//div[@id='pages']//a//@href",
'repeated' => true,
'required' => true,
'children' => array(
array(
// 抽取出其他分页的url待用
Expand All @@ -71,7 +92,7 @@
// attached_url 使用了上面抓取的 content_page_url
'source_type' => 'attached_url',
'attached_url' => 'content_page_url',
'selector' => "//*[@id='big-pic']"
'selector' => "//*[@id='big-pic']//a//img"
),
),
),
Expand All @@ -82,16 +103,21 @@

$spider->on_extract_field = function($fieldname, $data, $page)
{
if ($fieldname == 'contents')
if ($fieldname == 'addtime')
{
$data = substr($data, 0, 19);
}
elseif ($fieldname == 'contents')
{
if (!empty($data))
{
$contents = $data;
$data = "";
foreach ($contents as $content)
{
$data .= $content['page_content'];
$data .= ", ".$content['page_content'];
}
echo "\n\n".$data."\n\n";
}
}
return $data;
Expand Down
1 change: 0 additions & 1 deletion demo/attachment_download.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

$spider = new phpspider($configs);


$spider->on_start = function($phpspider)
{
$url = "http://ocnt0imhl.bkt.clouddn.com/imgs/1637/2015-07/k306n1wzvkq669nm.jpg";
Expand Down
1 change: 1 addition & 0 deletions demo/qiushibaike.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
/* 不要删除这段注释 */

$configs = array(
'name' => '糗事百科',
'domains' => array(
'qiushibaike.com',
'www.qiushibaike.com'
Expand Down

0 comments on commit dc306a0

Please sign in to comment.