输入代码
<?php
/**
* 分页采集雷锋网最新资讯,入库到本地
* querylist 插件自己到晚上去下载,然后引入下面的文件即可
* @author :kevin
*/
set_time_limit(0);
require 'core/QueryList.class.php';
//todo 数据库配置
require 'config/config.php';
header("Content-type;text/html;charset=utf-8");
echo "<pre>";
$reg = array(
'title' => array('.tit','text'),
'link' => array('.word a:eq(0)','href'),
'desc' => array('.des','text'),
'icon' => array('.img>a:eq(1) img','data-original'),
'icon_title' => array('.img a:eq(0)','text'),
'author_icon' => array('.aut img','src'),
'author_name' => array('.aut span','text'),
'create_time' => array('.time span:eq(1)','text'),
'comment_num' => array('.cmt>span','text'),
'create_date' => array('.time span:eq(0)','text','',function($content){
//用回调函数进一步过滤出日期
$arr = explode(' / ',$content);
//print_r($arr);
return $arr[0].'-'.$arr[1].'-'.$arr[2];
}),
);
$rang = '.wrap ul li';
$regDetails = array(
'content' => array('.lph-article-comView','html','-.lp-proCard',function($content){
return $content;
}),
);
for($page=6;$page<=100;$page++){
$URL = 'http://www.leiphone.com/page/'.$page.'#lph-pageList';
$result = QueryList::Query($URL,$reg,$rang,'','UTF-8');
$data = $result->jsonArr;
if($data){
foreach($data as $key => $val){
$detailsRes = QueryList::Query($val['link'],$regDetails,'','UTF-8');
$detailsData = $detailsRes->jsonArr;
// echo $detailsRes->getJSON();
// print_r($detailsData);
// die;
$table = "leiphone_list";
$sql = "insert into {$table} (`title`,`link`,`desc`,`icon`,`icon_desc`,`author_icon`,
`author_name`,`create_time`,`comment_num` ,`body`)values('".$val['title']."','".$val['link']."',
'".$val['desc']."','".$val['icon']."','".$val['icon_title']."','".$val['author_icon']."',
'".$val['author_name']."','".$val['create_date'].' '.$val['create_time']."','".$val['comment_num']."','".$detailsData[0]['content']."' )
";
if(!mysql_query($sql)){
echo $val['title']." insert Fail !!!<br><hr><br>";
}
}
}else{
die(json_encode(array('msg'=>"抓取数据为空或数据抓取失败,网络请求超 时!")));
}
}
2024 - 快车库 - 我的知识库 重庆启连科技有限公司 渝ICP备16002641号-10
企客连连 表单助手 企服开发 榜单123