forked from admin/zeropost-engine
feat: P4 metrics collector + /api/metrics; P5 from-url generator (cheerio)
This commit is contained in:
@@ -111,4 +111,23 @@ router.post('/topics-ideas', async (req, res) => {
|
||||
}
|
||||
});
|
||||
|
||||
// POST /api/generate/from-url — прочитать URL и написать пост в стиле канала
|
||||
router.post('/from-url', async (req, res) => {
|
||||
try {
|
||||
const { channelId, url } = req.body;
|
||||
const userId = parseInt(req.headers['x-user-id']) || null;
|
||||
if (!channelId || !url) return res.status(400).json({ error: 'channelId and url required' });
|
||||
|
||||
const channel = await channelsSvc.getChannel(userId, channelId);
|
||||
if (!channel) return res.status(404).json({ error: 'Channel not found' });
|
||||
|
||||
const { generateFromUrl } = require('../services/fromUrl');
|
||||
const result = await generateFromUrl({ url, channelId, channel });
|
||||
res.json(result);
|
||||
} catch (err) {
|
||||
console.error('[Route] POST /generate/from-url', err);
|
||||
res.status(500).json({ error: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
/**
|
||||
* GET /api/metrics/channel/:channelId — статистика канала
|
||||
* POST /api/metrics/collect — принудительный сбор (admin/cron)
|
||||
* GET /api/metrics/best-time/:channelId — лучшие дни/часы публикаций
|
||||
*/
|
||||
|
||||
const express = require('express');
|
||||
const router = express.Router();
|
||||
const { query } = require('../config/db');
|
||||
const { collectMetrics } = require('../services/metricsCollector');
|
||||
|
||||
// ── POST /api/metrics/collect — принудительный сбор ──────────────────────────
|
||||
router.post('/collect', async (req, res) => {
|
||||
try {
|
||||
const result = await collectMetrics();
|
||||
res.json({ ok: true, ...result });
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
// ── GET /api/metrics/channel/:channelId — сводка канала ──────────────────────
|
||||
router.get('/channel/:channelId', async (req, res) => {
|
||||
try {
|
||||
const { channelId } = req.params;
|
||||
const days = parseInt(req.query.days) || 30;
|
||||
const since = new Date(Date.now() - days * 86400_000);
|
||||
|
||||
// Посты по статусам
|
||||
const { rows: statusStats } = await query(`
|
||||
SELECT status, COUNT(*) as count
|
||||
FROM posts
|
||||
WHERE channel_id=$1 AND created_at > $2
|
||||
GROUP BY status
|
||||
`, [channelId, since]);
|
||||
|
||||
// Реакции топ-5 по постам
|
||||
const { rows: topPosts } = await query(`
|
||||
SELECT p.id, p.tg_message_id, p.published_at,
|
||||
p.reactions, p.forwards,
|
||||
left(p.content, 100) AS preview
|
||||
FROM posts p
|
||||
WHERE p.channel_id=$1
|
||||
AND p.published_at > $2
|
||||
AND p.reactions != '{}'
|
||||
ORDER BY (
|
||||
SELECT COALESCE(SUM(value::int), 0)
|
||||
FROM jsonb_each_text(p.reactions)
|
||||
) DESC
|
||||
LIMIT 5
|
||||
`, [channelId, since]);
|
||||
|
||||
// Общие реакции за период
|
||||
const { rows: reactionTotals } = await query(`
|
||||
SELECT key as emoji, SUM(value::int) as total
|
||||
FROM posts p, jsonb_each_text(p.reactions)
|
||||
WHERE p.channel_id=$1 AND p.published_at > $2
|
||||
GROUP BY key
|
||||
ORDER BY total DESC
|
||||
LIMIT 10
|
||||
`, [channelId, since]);
|
||||
|
||||
// Всего публикаций за период
|
||||
const { rows: totals } = await query(`
|
||||
SELECT COUNT(*) as total_posts,
|
||||
COUNT(CASE WHEN tg_message_id IS NOT NULL THEN 1 END) as published_to_tg,
|
||||
SUM(COALESCE(forwards,0)) as total_forwards
|
||||
FROM posts
|
||||
WHERE channel_id=$1 AND published_at > $2
|
||||
`, [channelId, since]);
|
||||
|
||||
res.json({
|
||||
channel_id: parseInt(channelId),
|
||||
days,
|
||||
totals: totals[0],
|
||||
by_status: statusStats,
|
||||
top_posts: topPosts,
|
||||
reaction_totals: reactionTotals,
|
||||
});
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
// ── GET /api/metrics/best-time/:channelId — лучший день/час ──────────────────
|
||||
router.get('/best-time/:channelId', async (req, res) => {
|
||||
try {
|
||||
const { channelId } = req.params;
|
||||
const days = parseInt(req.query.days) || 90;
|
||||
const since = new Date(Date.now() - days * 86400_000);
|
||||
|
||||
// Публикации по дням недели
|
||||
const { rows: byDow } = await query(`
|
||||
SELECT EXTRACT(ISODOW FROM published_at AT TIME ZONE 'Europe/Moscow') AS dow,
|
||||
COUNT(*) as count
|
||||
FROM posts
|
||||
WHERE channel_id=$1 AND published_at > $2 AND status='published'
|
||||
GROUP BY dow
|
||||
ORDER BY dow
|
||||
`, [channelId, since]);
|
||||
|
||||
// Публикации по часам (МСК)
|
||||
const { rows: byHour } = await query(`
|
||||
SELECT EXTRACT(HOUR FROM published_at AT TIME ZONE 'Europe/Moscow') AS hour,
|
||||
COUNT(*) as count
|
||||
FROM posts
|
||||
WHERE channel_id=$1 AND published_at > $2 AND status='published'
|
||||
GROUP BY hour
|
||||
ORDER BY hour
|
||||
`, [channelId, since]);
|
||||
|
||||
const DOW_LABELS = ['', 'Пн', 'Вт', 'Ср', 'Чт', 'Пт', 'Сб', 'Вс'];
|
||||
|
||||
res.json({
|
||||
channel_id: parseInt(channelId),
|
||||
days,
|
||||
by_dow: byDow.map(r => ({ dow: r.dow, label: DOW_LABELS[r.dow] || '?', count: parseInt(r.count) })),
|
||||
by_hour: byHour.map(r => ({ hour: parseInt(r.hour), count: parseInt(r.count) })),
|
||||
});
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
// ── GET /api/metrics/user-posts/:channelId — метрики user_posts ──────────────
|
||||
router.get('/user-posts/:channelId', async (req, res) => {
|
||||
try {
|
||||
const userId = parseInt(req.headers['x-user-id']);
|
||||
if (!userId) return res.status(401).json({ error: 'Unauthorized' });
|
||||
|
||||
const { channelId } = req.params;
|
||||
const days = parseInt(req.query.days) || 30;
|
||||
const since = new Date(Date.now() - days * 86400_000);
|
||||
|
||||
const { rows } = await query(`
|
||||
SELECT id, tg_message_id, status, published_at,
|
||||
reactions, forwards, metrics_at,
|
||||
left(content, 120) AS preview, topic, image_url
|
||||
FROM user_posts
|
||||
WHERE channel_id=$1 AND user_id=$2
|
||||
AND published_at > $3
|
||||
ORDER BY published_at DESC
|
||||
LIMIT 50
|
||||
`, [channelId, userId, since]);
|
||||
|
||||
res.json(rows);
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
module.exports = router;
|
||||
@@ -0,0 +1,195 @@
|
||||
/**
|
||||
* fromUrl.js — парсинг URL и генерация поста по содержимому страницы.
|
||||
*
|
||||
* Поддерживаемые источники:
|
||||
* 1. Любая веб-страница — cheerio, og-meta + основной текст
|
||||
* 2. YouTube — title + description (без транскрипта, yt-dlp не нужен)
|
||||
* 3. t.me публичный пост — текст сообщения
|
||||
*/
|
||||
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
const ai = require('./ai');
|
||||
const pb = require('./promptBuilder');
|
||||
|
||||
const FETCH_TIMEOUT = 12_000;
|
||||
const MAX_TEXT_LEN = 4000; // лимит текста для промта
|
||||
|
||||
// ── Парсеры ───────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* YouTube: title + description из yt-initial-data или og-meta
|
||||
*/
|
||||
async function parseYoutube(url) {
|
||||
const res = await axios.get(url, {
|
||||
timeout: FETCH_TIMEOUT,
|
||||
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1)' },
|
||||
maxRedirects: 5,
|
||||
});
|
||||
const $ = cheerio.load(res.data);
|
||||
|
||||
const title = $('meta[name="title"]').attr('content')
|
||||
|| $('meta[property="og:title"]').attr('content')
|
||||
|| $('title').text();
|
||||
|
||||
const description = $('meta[name="description"]').attr('content')
|
||||
|| $('meta[property="og:description"]').attr('content')
|
||||
|| '';
|
||||
|
||||
// Пробуем вытащить chapters / chapters из начальных данных
|
||||
let chapters = '';
|
||||
const dataMatch = res.data.match(/"chapters":\s*\[([^\]]{1,3000})\]/);
|
||||
if (dataMatch) {
|
||||
try {
|
||||
const arr = JSON.parse('[' + dataMatch[1] + ']');
|
||||
chapters = arr.map(c => c.title?.simpleText || '').filter(Boolean).join(', ');
|
||||
} catch {}
|
||||
}
|
||||
|
||||
const imageUrl = $('meta[property="og:image"]').attr('content') || null;
|
||||
|
||||
const text = [title, description, chapters ? `Главы: ${chapters}` : '']
|
||||
.filter(Boolean).join('\n\n').slice(0, MAX_TEXT_LEN);
|
||||
|
||||
return { title, text, imageUrl, source: 'youtube' };
|
||||
}
|
||||
|
||||
/**
|
||||
* t.me публичный пост (embed)
|
||||
*/
|
||||
async function parseTelegram(url) {
|
||||
// Конвертируем t.me/channel/123 → embed
|
||||
const embedUrl = url.replace('https://t.me/', 'https://t.me/') + '?embed=1&mode=tme';
|
||||
const res = await axios.get(embedUrl, {
|
||||
timeout: FETCH_TIMEOUT,
|
||||
headers: { 'User-Agent': 'Mozilla/5.0' },
|
||||
});
|
||||
const $ = cheerio.load(res.data);
|
||||
|
||||
const text = $('.tgme_widget_message_text').text().trim()
|
||||
|| $('meta[property="og:description"]').attr('content')
|
||||
|| '';
|
||||
|
||||
const title = $('meta[property="og:title"]').attr('content') || 'Telegram пост';
|
||||
const imageUrl = $('meta[property="og:image"]').attr('content') || null;
|
||||
|
||||
return { title, text: text.slice(0, MAX_TEXT_LEN), imageUrl, source: 'telegram' };
|
||||
}
|
||||
|
||||
/**
|
||||
* Универсальная веб-страница
|
||||
*/
|
||||
async function parseWeb(url) {
|
||||
const res = await axios.get(url, {
|
||||
timeout: FETCH_TIMEOUT,
|
||||
headers: {
|
||||
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
|
||||
'Accept-Language': 'ru-RU,ru;q=0.9,en;q=0.8',
|
||||
},
|
||||
maxRedirects: 5,
|
||||
});
|
||||
|
||||
const $ = cheerio.load(res.data);
|
||||
|
||||
// Убираем мусор
|
||||
$('script, style, nav, footer, header, aside, form, .cookie, .banner, .popup, .ad').remove();
|
||||
|
||||
const title = $('meta[property="og:title"]').attr('content')
|
||||
|| $('meta[name="title"]').attr('content')
|
||||
|| $('h1').first().text().trim()
|
||||
|| $('title').text().trim();
|
||||
|
||||
const description = $('meta[property="og:description"]').attr('content')
|
||||
|| $('meta[name="description"]').attr('content')
|
||||
|| '';
|
||||
|
||||
const imageUrl = $('meta[property="og:image"]').attr('content') || null;
|
||||
|
||||
// Основной текст: article > p, или просто все параграфы
|
||||
const paragraphs = [];
|
||||
const container = $('article, main, .content, .post, .entry, [role="main"]').first();
|
||||
const source = container.length ? container : $('body');
|
||||
|
||||
source.find('p, h2, h3, li').each((_, el) => {
|
||||
const t = $(el).text().trim();
|
||||
if (t.length > 40) paragraphs.push(t);
|
||||
});
|
||||
|
||||
const bodyText = paragraphs.join('\n').slice(0, MAX_TEXT_LEN);
|
||||
|
||||
const text = [description, bodyText].filter(Boolean).join('\n\n').slice(0, MAX_TEXT_LEN);
|
||||
|
||||
return { title, text, imageUrl, source: 'web' };
|
||||
}
|
||||
|
||||
// ── Роутер источников ─────────────────────────────────────────────────────────
|
||||
|
||||
async function parseUrl(url) {
|
||||
try {
|
||||
const u = new URL(url);
|
||||
if (u.hostname.includes('youtube.com') || u.hostname.includes('youtu.be')) {
|
||||
return await parseYoutube(url);
|
||||
}
|
||||
if (u.hostname === 't.me') {
|
||||
return await parseTelegram(url);
|
||||
}
|
||||
return await parseWeb(url);
|
||||
} catch (err) {
|
||||
throw new Error(`Не удалось загрузить страницу: ${err.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Генерация поста по распарсенному контенту ─────────────────────────────────
|
||||
|
||||
async function generateFromUrl({ url, channelId, channel }) {
|
||||
if (!url) throw new Error('url required');
|
||||
if (!channel) throw new Error('channel required');
|
||||
|
||||
// 1. Парсим страницу
|
||||
const parsed = await parseUrl(url);
|
||||
|
||||
if (!parsed.text && !parsed.title) {
|
||||
throw new Error('Не удалось извлечь текст со страницы');
|
||||
}
|
||||
|
||||
// 2. Строим промт
|
||||
const channelContext = pb.buildPostSystemPrompt(channel, '');
|
||||
|
||||
const userPrompt = `На основе материала ниже напиши пост для Telegram-канала в стиле этого канала.
|
||||
|
||||
ИСТОЧНИК: ${parsed.source === 'youtube' ? 'YouTube-видео' : parsed.source === 'telegram' ? 'Telegram-пост' : 'Веб-статья'}
|
||||
URL: ${url}
|
||||
|
||||
ЗАГОЛОВОК:
|
||||
${parsed.title || '—'}
|
||||
|
||||
СОДЕРЖАНИЕ:
|
||||
${parsed.text || '(текст не извлечён, опирайся на заголовок)'}
|
||||
|
||||
---
|
||||
|
||||
ЗАДАЧА:
|
||||
— Напиши пост в стиле и голосе канала
|
||||
— Передай суть материала своими словами, не пересказывай дословно
|
||||
— Добавь свой угол зрения или вывод
|
||||
— Длина поста: 150–500 символов
|
||||
— Верни ТОЛЬКО текст поста, без пояснений`;
|
||||
|
||||
// 3. Генерируем
|
||||
const result = await ai.chat(
|
||||
require('../config').ai.models.post,
|
||||
channelContext,
|
||||
userPrompt,
|
||||
{ maxTokens: 1000, temperature: 0.85 }
|
||||
);
|
||||
|
||||
return {
|
||||
content: result.text,
|
||||
title: parsed.title,
|
||||
imageUrl: parsed.imageUrl,
|
||||
source: parsed.source,
|
||||
usage: result.usage,
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = { generateFromUrl, parseUrl };
|
||||
@@ -0,0 +1,162 @@
|
||||
/**
|
||||
* metricsCollector.js
|
||||
* Воркер сбора метрик для опубликованных постов.
|
||||
*
|
||||
* Что умеет:
|
||||
* - getMessageReactionCount (Bot API 7+) — реакции на пост
|
||||
* - forwards — getForwardCount (Bot API 8+, если доступен)
|
||||
* - views — в Bot API недоступны напрямую; оставляем 0 до MTProto
|
||||
*
|
||||
* Запуск: каждые 15 минут через setInterval (из index.js)
|
||||
* или вручную: POST /api/metrics/collect
|
||||
*/
|
||||
|
||||
const axios = require('axios');
|
||||
const { query } = require('../config/db');
|
||||
|
||||
const COLLECT_WINDOW_DAYS = 30; // собираем метрики для постов за последние N дней
|
||||
|
||||
async function getTgApiBase() {
|
||||
try {
|
||||
const { rows } = await query(`SELECT value FROM app_settings WHERE key='TELEGRAM_API_BASE'`);
|
||||
return rows[0]?.value?.replace(/\/$/, '') || 'https://api.telegram.org';
|
||||
} catch { return 'https://api.telegram.org'; }
|
||||
}
|
||||
|
||||
/**
|
||||
* Собрать реакции для одного поста.
|
||||
* Возвращает { reactions: {emoji: count, ...}, forwards: 0, views: 0 }
|
||||
*/
|
||||
async function collectForPost({ botToken, tgChannelId, tgMessageId, tgApiBase }) {
|
||||
const result = { reactions: {}, forwards: 0, views: 0 };
|
||||
if (!botToken || !tgChannelId || !tgMessageId) return result;
|
||||
|
||||
try {
|
||||
const url = `${tgApiBase}/bot${botToken}/getMessageReactionCount`;
|
||||
const res = await axios.get(url, {
|
||||
params: { chat_id: tgChannelId, message_id: tgMessageId },
|
||||
timeout: 8000,
|
||||
});
|
||||
if (res.data?.ok && Array.isArray(res.data.result?.reactions)) {
|
||||
for (const r of res.data.result.reactions) {
|
||||
const emoji = r.type?.emoji || r.type?.custom_emoji_id || '?';
|
||||
result.reactions[emoji] = (result.reactions[emoji] || 0) + (r.count || 0);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
// 400 = реакции не включены или пост не найден — не критично
|
||||
if (e.response?.status !== 400) {
|
||||
console.warn('[Metrics] getMessageReactionCount error:', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Основная функция сбора метрик.
|
||||
* Проходит по posts (системные каналы) за последние COLLECT_WINDOW_DAYS дней.
|
||||
*/
|
||||
async function collectMetrics() {
|
||||
const tgApiBase = await getTgApiBase();
|
||||
const since = new Date(Date.now() - COLLECT_WINDOW_DAYS * 86400_000);
|
||||
|
||||
// Берём посты с tg_message_id за последние N дней
|
||||
const { rows: posts } = await query(`
|
||||
SELECT p.id, p.tg_message_id, p.channel_id, p.published_at,
|
||||
c.bot_token, c.tg_channel_id
|
||||
FROM posts p
|
||||
JOIN channels c ON c.id = p.channel_id
|
||||
WHERE p.tg_message_id IS NOT NULL
|
||||
AND p.published_at > $1
|
||||
AND c.platform = 'telegram'
|
||||
AND c.bot_token IS NOT NULL
|
||||
ORDER BY p.published_at DESC
|
||||
LIMIT 100
|
||||
`, [since]);
|
||||
|
||||
let updated = 0;
|
||||
for (const post of posts) {
|
||||
try {
|
||||
const metrics = await collectForPost({
|
||||
botToken: post.bot_token,
|
||||
tgChannelId: post.tg_channel_id,
|
||||
tgMessageId: post.tg_message_id,
|
||||
tgApiBase,
|
||||
});
|
||||
|
||||
const totalReactions = Object.values(metrics.reactions).reduce((s, v) => s + v, 0);
|
||||
|
||||
// Обновляем posts — последний снапшот
|
||||
await query(`
|
||||
UPDATE posts
|
||||
SET reactions=$1, forwards=$2, metrics_at=NOW()
|
||||
WHERE id=$3
|
||||
`, [JSON.stringify(metrics.reactions), metrics.forwards, post.id]);
|
||||
|
||||
// Пишем в историю только если есть хоть что-то
|
||||
if (totalReactions > 0 || metrics.forwards > 0) {
|
||||
await query(`
|
||||
INSERT INTO post_metrics (post_id, captured_at, views, forwards, reactions)
|
||||
VALUES ($1, NOW(), $2, $3, $4)
|
||||
`, [post.id, metrics.views, metrics.forwards, JSON.stringify(metrics.reactions)]);
|
||||
}
|
||||
|
||||
updated++;
|
||||
} catch (e) {
|
||||
console.error('[Metrics] post', post.id, 'error:', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
// user_posts — пользовательские посты с tg_message_id
|
||||
const { rows: userPosts } = await query(`
|
||||
SELECT up.id, up.tg_message_id, up.channel_id,
|
||||
c.bot_token, c.tg_channel_id
|
||||
FROM user_posts up
|
||||
JOIN channels c ON c.id = up.channel_id
|
||||
WHERE up.tg_message_id IS NOT NULL
|
||||
AND up.published_at > $1
|
||||
AND c.platform = 'telegram'
|
||||
AND c.bot_token IS NOT NULL
|
||||
ORDER BY up.published_at DESC
|
||||
LIMIT 50
|
||||
`, [since]);
|
||||
|
||||
for (const post of userPosts) {
|
||||
try {
|
||||
const metrics = await collectForPost({
|
||||
botToken: post.bot_token,
|
||||
tgChannelId: post.tg_channel_id,
|
||||
tgMessageId: post.tg_message_id,
|
||||
tgApiBase,
|
||||
});
|
||||
|
||||
await query(`
|
||||
UPDATE user_posts
|
||||
SET reactions=$1, forwards=$2, metrics_at=NOW()
|
||||
WHERE id=$3
|
||||
`, [JSON.stringify(metrics.reactions), metrics.forwards, post.id]);
|
||||
|
||||
updated++;
|
||||
} catch (e) {
|
||||
console.error('[Metrics] user_post', post.id, 'error:', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`[Metrics] Collected for ${updated} posts`);
|
||||
return { updated };
|
||||
}
|
||||
|
||||
// Авто-запуск каждые 15 минут
|
||||
let _timer = null;
|
||||
function startAutoCollect() {
|
||||
if (_timer) return;
|
||||
_timer = setInterval(() => {
|
||||
collectMetrics().catch(e => console.error('[Metrics] auto-collect error:', e.message));
|
||||
}, 15 * 60 * 1000);
|
||||
// Первый запуск через 30 секунд после старта
|
||||
setTimeout(() => collectMetrics().catch(e => console.error('[Metrics] init error:', e.message)), 30_000);
|
||||
console.log('[Metrics] Auto-collect started (every 15 min)');
|
||||
}
|
||||
|
||||
module.exports = { collectMetrics, startAutoCollect };
|
||||
Reference in New Issue
Block a user