Products
96SEO 2025-07-19 16:24 3
一、编写采集入库脚本接口
新建2个php文件:
1. 目录./api/下新建caiji.php,代码:
<?php
/**
* 数据采集
*/
define('IS_API', basename(__FILE__, '.php')); // 项目标识
define('SELF', pathinfo(__FILE__, PATHINFO_BASENAME)); // 该文件的名称
require('../index.php'); // 引入主文件
2. 目录 ./dayrui/My/Api/下,如果没有就新建个目录,新建caiji.php,代码如下:
<?php
$this->_module_init('news'); // news 是模块目录
if ($_GET['action'] == 'category') {
// 显示栏目
foreach ($this->module['category'] as $t) {
if ($t['child'] == 0 && $t['tid'] == 1) {
echo '<h1>'.$t['name'].'<=>'.$t['id'].'</h1>'.PHP_EOL;
}
}
} else {
// 入库数据
$data = $_REQUEST;
// 发布者id 1
$data['uid'] = 1;
// 发布者账号 admin
$data['author'] = 'admin';
// 主表字段
$fields[1] = $this->get_cache('table-'.SITE_ID, $this->content_model->dbprefix(SITE_ID.'_'.MOD_DIR));
$cache = $this->get_cache('table-'.SITE_ID, $this->content_model->dbprefix(SITE_ID.'_'.MOD_DIR.'_category_data'));
$cache && $fields[1] = array_merge($fields[1], $cache);
// 附表字段
$fields[0] = $this->get_cache('table-'.SITE_ID, $this->content_model->dbprefix(SITE_ID.'_'.MOD_DIR.'_data_0'));
$cache = $this->get_cache('table-'.SITE_ID, $this->content_model->dbprefix(SITE_ID.'_'.MOD_DIR.'_category_data_0'));
$cache && $fields[0] = array_merge($fields[0], $cache);
// 去重复
$fields[0] = array_unique($fields[0]);
$fields[1] = array_unique($fields[1]);
$save = [];
// 主表附表归类
foreach ($fields as $ismain => $field) {
foreach ($field as $name) {
isset($data[$name]) && $save[$ismain][$name] = $data[$name];
}
}
if (!$data['catid']) {
exit('栏目为空');
}
/*随机发栏目,下面的数字是栏目id
$arr_1 = array("44","45","46","47","48","49","50","51","52","53","54","55","56","57","58","59","60","61","62","63","64","65","66","67","68","69","70","71","72","73","74","75","76","77","78","79","80","81","82","83","84","85");
$randarr= mt_rand(0,count($arr_1)-1);
$data['catid'] = $arr_1[$randarr];
*/
$save[1]['uid'] = $save[0]['uid'] = $data['uid'];
$save[1]['catid'] = $save[0]['catid'] = $data['catid'];
// 提取内容中第一张图作为缩略图
$save[1]['url'] = '';
$save[1]['status'] = 9; //9表示正常发布,1表示审核里面
$save[1]['hits'] = random_int(50,900);
$save[1]['displayorder'] = 0;
$save[1]['link_id'] = 0;
//$save[1]['comments'] = 0;
// $save[1]['avgsort'] = 0;
$save[1]['inputtime'] = $save[1]['updatetime'] = SYS_TIME + rand(0, 7200);
$save[1]['inputip'] = '127.0.0.1';
function myTrim($str)
{
$search = array(" "," ","\n","\r","\t");
$replace = array("","","","","");
return str_replace($search, $replace, $str);
}
$nr=dr_clearhtml($save[0]['content']);
$nr=myTrim($nr);
$save[1]['description']=dr_clearhtml(substr($nr,0,400));
$value=$save[0]['content'];
$formsite = '/'; //相对地址时候目标域名
$value = str_replace('src="/', 'src="'.$formsite, $value);
//exit($value);
if (preg_match_all("/(src)=([\"|']?)([^ \"'>]+\.(gif|jpg|jpeg|png))\\2/i", $value, $imgs)) {
foreach ($imgs[3] as $img) {
if (strpos($img, '/api/ueditor/') !== false
|| strpos($img, '/api/umeditor/') !== false) {
continue;
}
// 下载图片
if (strpos($img, 'http') === 0) {
// 正常下载
// 判断域名白名单
$arr = parse_url($img);
$domain = $arr['host'];
if ($domain) {
$sites = WRITEPATH.'config/domain_site.php';
if (isset($sites[$domain])) {
// 过滤站点域名
} elseif (strpos(SYS_UPLOAD_URL, $domain) !== false) {
// 过滤附件白名单
} else {
$zj = 0;
$remote = \Phpcmf\Service::C()->get_cache('attachment');
if ($remote) {
foreach ($remote as $t) {
if (strpos($t['url'], $domain) !== false) {
$zj = 1;
break;
}
}
}
if ($zj == 0) {
// 可以下载文件
// 同步模式
// 下载远程文件
$rt = \Phpcmf\Service::L('upload')->down_file([
'url' => $img,
'attachment' => \Phpcmf\Service::M('Attachment')->get_attach_info(intval($field['content']['setting']['option']['attachment'])),
]);
if ($rt['code']) {
$att = \Phpcmf\Service::M('Attachment')->save_data($rt['data'], 'ueditor_down_img');
if ($att['code']) {
// 归档成功
$value = str_replace($img, $rt['data']['url'], $value);
$img = $att['code'];
}
}
}
}
}
}
}
}
$save[0]['content']=$value;
$imgs = dr_get_content_img($save[0]['content']);
$save[1]['thumb'] = (string)$imgs[0];
// 验证标题重复
if ($this->content_model->table(SITE_ID.'_'.MOD_DIR)->where('title', $save[1]['title'])->counts()) {
echo '重复';exit;
}
// $rt = $this->content_model->save(0, $save);
$rt = $this->content_model->save_content(0, $save);
if ($rt['code']) {
exit('成功');
} else {
exit('失败');
}
}
exit;
二、火车采集器新建一个在线发布模块
选择迅睿发布模块-网页编码utf-8-网站地址填上首页地址,测试获取列表。
成功获取列表就说明成功了。
要发布的标签自行测试。
Demand feedback