diff --git a/index.js b/index.js index 11232c6..64140dc 100644 --- a/index.js +++ b/index.js @@ -18,9 +18,12 @@ const photoSearchRoutes = require('./src/routes/photo-search'); const scheduledPostsRoutes = require('./src/routes/scheduledPosts'); const channelStatsRoutes = require('./src/routes/channelStats'); const calendarRoutes = require('./src/routes/calendar'); +const metricsRoutes = require('./src/routes/metrics'); // Start queue worker require('./src/workers/generation'); +// Metrics collector +require('./src/services/metricsCollector').startAutoCollect(); const app = express(); app.use(express.json()); @@ -58,6 +61,7 @@ app.use('/api/photo-search', photoSearchRoutes); app.use('/api/scheduled-posts', scheduledPostsRoutes); app.use('/api/channel-stats', channelStatsRoutes); app.use('/api/calendar', calendarRoutes); +app.use('/api/metrics', metricsRoutes); app.get('/health', (req, res) => { res.json({ ok: true, service: 'zeropost-engine', time: new Date() }); diff --git a/package-lock.json b/package-lock.json index 895b505..fcffe59 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "axios": "^1.16.1", "bull": "^4.16.5", + "cheerio": "^1.2.0", "dotenv": "^17.4.2", "express": "^5.2.1", "fast-xml-parser": "^4.5.6", @@ -646,6 +647,12 @@ "url": "https://opencollective.com/express" } }, + "node_modules/boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==", + "license": "ISC" + }, "node_modules/bull": { "version": "4.16.5", "resolved": "https://registry.npmjs.org/bull/-/bull-4.16.5.tgz", @@ -702,6 +709,48 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/cheerio": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.2.0.tgz", + "integrity": "sha512-WDrybc/gKFpTYQutKIK6UvfcuxijIZfMfXaYm8NMsPQxSYvf+13fXUJ4rztGGbJcBQ/GF55gvrZ0Bc0bj/mqvg==", + "license": "MIT", + "dependencies": { + "cheerio-select": "^2.1.0", + "dom-serializer": "^2.0.0", + "domhandler": "^5.0.3", + "domutils": "^3.2.2", + "encoding-sniffer": "^0.2.1", + "htmlparser2": "^10.1.0", + "parse5": "^7.3.0", + "parse5-htmlparser2-tree-adapter": "^7.1.0", + "parse5-parser-stream": "^7.1.2", + "undici": "^7.19.0", + "whatwg-mimetype": "^4.0.0" + }, + "engines": { + "node": ">=20.18.1" + }, + "funding": { + "url": "https://github.com/cheeriojs/cheerio?sponsor=1" + } + }, + "node_modules/cheerio-select": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz", + "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-select": "^5.1.0", + "css-what": "^6.1.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/cluster-key-slot": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/cluster-key-slot/-/cluster-key-slot-1.1.1.tgz", @@ -775,6 +824,34 @@ "node": ">=12.0.0" } }, + "node_modules/css-select": { + "version": "5.2.2", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.2.2.tgz", + "integrity": "sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/css-what": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-6.2.2.tgz", + "integrity": "sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">= 6" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/debug": { "version": "4.4.3", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", @@ -828,6 +905,61 @@ "node": ">=8" } }, + "node_modules/dom-serializer": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" + } + }, + "node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "BSD-2-Clause" + }, + "node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "license": "BSD-2-Clause", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } + }, + "node_modules/domutils": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.2.2.tgz", + "integrity": "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==", + "license": "BSD-2-Clause", + "dependencies": { + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, "node_modules/dotenv": { "version": "17.4.2", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-17.4.2.tgz", @@ -869,6 +1001,43 @@ "node": ">= 0.8" } }, + "node_modules/encoding-sniffer": { + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/encoding-sniffer/-/encoding-sniffer-0.2.1.tgz", + "integrity": "sha512-5gvq20T6vfpekVtqrYQsSCFZ1wEg5+wW0/QaZMWkFr6BqD3NfKs0rLCx4rrVlSWJeZb5NBJgVLswK/w2MWU+Gw==", + "license": "MIT", + "dependencies": { + "iconv-lite": "^0.6.3", + "whatwg-encoding": "^3.1.1" + }, + "funding": { + "url": "https://github.com/fb55/encoding-sniffer?sponsor=1" + } + }, + "node_modules/encoding-sniffer/node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/es-define-property": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", @@ -1195,6 +1364,37 @@ "node": ">= 0.4" } }, + "node_modules/htmlparser2": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.1.0.tgz", + "integrity": "sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.2.2", + "entities": "^7.0.1" + } + }, + "node_modules/htmlparser2/node_modules/entities": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-7.0.1.tgz", + "integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/http-errors": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", @@ -1427,6 +1627,18 @@ "node-gyp-build-optional-packages-test": "build-test.js" } }, + "node_modules/nth-check": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-2.1.1.tgz", + "integrity": "sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==", + "license": "BSD-2-Clause", + "dependencies": { + "boolbase": "^1.0.0" + }, + "funding": { + "url": "https://github.com/fb55/nth-check?sponsor=1" + } + }, "node_modules/object-inspect": { "version": "1.13.4", "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", @@ -1460,6 +1672,55 @@ "wrappy": "1" } }, + "node_modules/parse5": { + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz", + "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==", + "license": "MIT", + "dependencies": { + "entities": "^6.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-htmlparser2-tree-adapter": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-7.1.0.tgz", + "integrity": "sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==", + "license": "MIT", + "dependencies": { + "domhandler": "^5.0.3", + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5-parser-stream": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5-parser-stream/-/parse5-parser-stream-7.1.2.tgz", + "integrity": "sha512-JyeQc9iwFLn5TbvvqACIF/VXG6abODeB3Fwmv/TGdLk2LfbWkaySGY72at4+Ty7EkPZj854u4CrICqNk2qIbow==", + "license": "MIT", + "dependencies": { + "parse5": "^7.0.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, + "node_modules/parse5/node_modules/entities": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz", + "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/parseurl": { "version": "1.3.3", "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", @@ -1973,6 +2234,15 @@ "url": "https://opencollective.com/express" } }, + "node_modules/undici": { + "version": "7.27.2", + "resolved": "https://registry.npmjs.org/undici/-/undici-7.27.2.tgz", + "integrity": "sha512-uZsKNuzQxDMUY6M3pIMvy5tvlGmtq8XJ2oLAkfRKGNu+1VQAIvLy2xIVG5ATZl5wDXl/tddByAWCizRbOme+TA==", + "license": "MIT", + "engines": { + "node": ">=20.18.1" + } + }, "node_modules/unpipe": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", @@ -2001,6 +2271,40 @@ "node": ">= 0.8" } }, + "node_modules/whatwg-encoding": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", + "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", + "deprecated": "Use @exodus/bytes instead for a more spec-conformant and faster implementation", + "license": "MIT", + "dependencies": { + "iconv-lite": "0.6.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/whatwg-encoding/node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/whatwg-mimetype": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", + "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, "node_modules/wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", diff --git a/package.json b/package.json index c4a7116..67e1a0f 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ "dependencies": { "axios": "^1.16.1", "bull": "^4.16.5", + "cheerio": "^1.2.0", "dotenv": "^17.4.2", "express": "^5.2.1", "fast-xml-parser": "^4.5.6", diff --git a/src/routes/generate.js b/src/routes/generate.js index e991975..fce36d9 100644 --- a/src/routes/generate.js +++ b/src/routes/generate.js @@ -111,4 +111,23 @@ router.post('/topics-ideas', async (req, res) => { } }); +// POST /api/generate/from-url — прочитать URL и написать пост в стиле канала +router.post('/from-url', async (req, res) => { + try { + const { channelId, url } = req.body; + const userId = parseInt(req.headers['x-user-id']) || null; + if (!channelId || !url) return res.status(400).json({ error: 'channelId and url required' }); + + const channel = await channelsSvc.getChannel(userId, channelId); + if (!channel) return res.status(404).json({ error: 'Channel not found' }); + + const { generateFromUrl } = require('../services/fromUrl'); + const result = await generateFromUrl({ url, channelId, channel }); + res.json(result); + } catch (err) { + console.error('[Route] POST /generate/from-url', err); + res.status(500).json({ error: err.message }); + } +}); + module.exports = router; diff --git a/src/routes/metrics.js b/src/routes/metrics.js new file mode 100644 index 0000000..21177ff --- /dev/null +++ b/src/routes/metrics.js @@ -0,0 +1,152 @@ +/** + * GET /api/metrics/channel/:channelId — статистика канала + * POST /api/metrics/collect — принудительный сбор (admin/cron) + * GET /api/metrics/best-time/:channelId — лучшие дни/часы публикаций + */ + +const express = require('express'); +const router = express.Router(); +const { query } = require('../config/db'); +const { collectMetrics } = require('../services/metricsCollector'); + +// ── POST /api/metrics/collect — принудительный сбор ────────────────────────── +router.post('/collect', async (req, res) => { + try { + const result = await collectMetrics(); + res.json({ ok: true, ...result }); + } catch (err) { + res.status(500).json({ error: err.message }); + } +}); + +// ── GET /api/metrics/channel/:channelId — сводка канала ────────────────────── +router.get('/channel/:channelId', async (req, res) => { + try { + const { channelId } = req.params; + const days = parseInt(req.query.days) || 30; + const since = new Date(Date.now() - days * 86400_000); + + // Посты по статусам + const { rows: statusStats } = await query(` + SELECT status, COUNT(*) as count + FROM posts + WHERE channel_id=$1 AND created_at > $2 + GROUP BY status + `, [channelId, since]); + + // Реакции топ-5 по постам + const { rows: topPosts } = await query(` + SELECT p.id, p.tg_message_id, p.published_at, + p.reactions, p.forwards, + left(p.content, 100) AS preview + FROM posts p + WHERE p.channel_id=$1 + AND p.published_at > $2 + AND p.reactions != '{}' + ORDER BY ( + SELECT COALESCE(SUM(value::int), 0) + FROM jsonb_each_text(p.reactions) + ) DESC + LIMIT 5 + `, [channelId, since]); + + // Общие реакции за период + const { rows: reactionTotals } = await query(` + SELECT key as emoji, SUM(value::int) as total + FROM posts p, jsonb_each_text(p.reactions) + WHERE p.channel_id=$1 AND p.published_at > $2 + GROUP BY key + ORDER BY total DESC + LIMIT 10 + `, [channelId, since]); + + // Всего публикаций за период + const { rows: totals } = await query(` + SELECT COUNT(*) as total_posts, + COUNT(CASE WHEN tg_message_id IS NOT NULL THEN 1 END) as published_to_tg, + SUM(COALESCE(forwards,0)) as total_forwards + FROM posts + WHERE channel_id=$1 AND published_at > $2 + `, [channelId, since]); + + res.json({ + channel_id: parseInt(channelId), + days, + totals: totals[0], + by_status: statusStats, + top_posts: topPosts, + reaction_totals: reactionTotals, + }); + } catch (err) { + res.status(500).json({ error: err.message }); + } +}); + +// ── GET /api/metrics/best-time/:channelId — лучший день/час ────────────────── +router.get('/best-time/:channelId', async (req, res) => { + try { + const { channelId } = req.params; + const days = parseInt(req.query.days) || 90; + const since = new Date(Date.now() - days * 86400_000); + + // Публикации по дням недели + const { rows: byDow } = await query(` + SELECT EXTRACT(ISODOW FROM published_at AT TIME ZONE 'Europe/Moscow') AS dow, + COUNT(*) as count + FROM posts + WHERE channel_id=$1 AND published_at > $2 AND status='published' + GROUP BY dow + ORDER BY dow + `, [channelId, since]); + + // Публикации по часам (МСК) + const { rows: byHour } = await query(` + SELECT EXTRACT(HOUR FROM published_at AT TIME ZONE 'Europe/Moscow') AS hour, + COUNT(*) as count + FROM posts + WHERE channel_id=$1 AND published_at > $2 AND status='published' + GROUP BY hour + ORDER BY hour + `, [channelId, since]); + + const DOW_LABELS = ['', 'Пн', 'Вт', 'Ср', 'Чт', 'Пт', 'Сб', 'Вс']; + + res.json({ + channel_id: parseInt(channelId), + days, + by_dow: byDow.map(r => ({ dow: r.dow, label: DOW_LABELS[r.dow] || '?', count: parseInt(r.count) })), + by_hour: byHour.map(r => ({ hour: parseInt(r.hour), count: parseInt(r.count) })), + }); + } catch (err) { + res.status(500).json({ error: err.message }); + } +}); + +// ── GET /api/metrics/user-posts/:channelId — метрики user_posts ────────────── +router.get('/user-posts/:channelId', async (req, res) => { + try { + const userId = parseInt(req.headers['x-user-id']); + if (!userId) return res.status(401).json({ error: 'Unauthorized' }); + + const { channelId } = req.params; + const days = parseInt(req.query.days) || 30; + const since = new Date(Date.now() - days * 86400_000); + + const { rows } = await query(` + SELECT id, tg_message_id, status, published_at, + reactions, forwards, metrics_at, + left(content, 120) AS preview, topic, image_url + FROM user_posts + WHERE channel_id=$1 AND user_id=$2 + AND published_at > $3 + ORDER BY published_at DESC + LIMIT 50 + `, [channelId, userId, since]); + + res.json(rows); + } catch (err) { + res.status(500).json({ error: err.message }); + } +}); + +module.exports = router; diff --git a/src/services/fromUrl.js b/src/services/fromUrl.js new file mode 100644 index 0000000..639d855 --- /dev/null +++ b/src/services/fromUrl.js @@ -0,0 +1,195 @@ +/** + * fromUrl.js — парсинг URL и генерация поста по содержимому страницы. + * + * Поддерживаемые источники: + * 1. Любая веб-страница — cheerio, og-meta + основной текст + * 2. YouTube — title + description (без транскрипта, yt-dlp не нужен) + * 3. t.me публичный пост — текст сообщения + */ + +const axios = require('axios'); +const cheerio = require('cheerio'); +const ai = require('./ai'); +const pb = require('./promptBuilder'); + +const FETCH_TIMEOUT = 12_000; +const MAX_TEXT_LEN = 4000; // лимит текста для промта + +// ── Парсеры ─────────────────────────────────────────────────────────────────── + +/** + * YouTube: title + description из yt-initial-data или og-meta + */ +async function parseYoutube(url) { + const res = await axios.get(url, { + timeout: FETCH_TIMEOUT, + headers: { 'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1)' }, + maxRedirects: 5, + }); + const $ = cheerio.load(res.data); + + const title = $('meta[name="title"]').attr('content') + || $('meta[property="og:title"]').attr('content') + || $('title').text(); + + const description = $('meta[name="description"]').attr('content') + || $('meta[property="og:description"]').attr('content') + || ''; + + // Пробуем вытащить chapters / chapters из начальных данных + let chapters = ''; + const dataMatch = res.data.match(/"chapters":\s*\[([^\]]{1,3000})\]/); + if (dataMatch) { + try { + const arr = JSON.parse('[' + dataMatch[1] + ']'); + chapters = arr.map(c => c.title?.simpleText || '').filter(Boolean).join(', '); + } catch {} + } + + const imageUrl = $('meta[property="og:image"]').attr('content') || null; + + const text = [title, description, chapters ? `Главы: ${chapters}` : ''] + .filter(Boolean).join('\n\n').slice(0, MAX_TEXT_LEN); + + return { title, text, imageUrl, source: 'youtube' }; +} + +/** + * t.me публичный пост (embed) + */ +async function parseTelegram(url) { + // Конвертируем t.me/channel/123 → embed + const embedUrl = url.replace('https://t.me/', 'https://t.me/') + '?embed=1&mode=tme'; + const res = await axios.get(embedUrl, { + timeout: FETCH_TIMEOUT, + headers: { 'User-Agent': 'Mozilla/5.0' }, + }); + const $ = cheerio.load(res.data); + + const text = $('.tgme_widget_message_text').text().trim() + || $('meta[property="og:description"]').attr('content') + || ''; + + const title = $('meta[property="og:title"]').attr('content') || 'Telegram пост'; + const imageUrl = $('meta[property="og:image"]').attr('content') || null; + + return { title, text: text.slice(0, MAX_TEXT_LEN), imageUrl, source: 'telegram' }; +} + +/** + * Универсальная веб-страница + */ +async function parseWeb(url) { + const res = await axios.get(url, { + timeout: FETCH_TIMEOUT, + headers: { + 'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', + 'Accept-Language': 'ru-RU,ru;q=0.9,en;q=0.8', + }, + maxRedirects: 5, + }); + + const $ = cheerio.load(res.data); + + // Убираем мусор + $('script, style, nav, footer, header, aside, form, .cookie, .banner, .popup, .ad').remove(); + + const title = $('meta[property="og:title"]').attr('content') + || $('meta[name="title"]').attr('content') + || $('h1').first().text().trim() + || $('title').text().trim(); + + const description = $('meta[property="og:description"]').attr('content') + || $('meta[name="description"]').attr('content') + || ''; + + const imageUrl = $('meta[property="og:image"]').attr('content') || null; + + // Основной текст: article > p, или просто все параграфы + const paragraphs = []; + const container = $('article, main, .content, .post, .entry, [role="main"]').first(); + const source = container.length ? container : $('body'); + + source.find('p, h2, h3, li').each((_, el) => { + const t = $(el).text().trim(); + if (t.length > 40) paragraphs.push(t); + }); + + const bodyText = paragraphs.join('\n').slice(0, MAX_TEXT_LEN); + + const text = [description, bodyText].filter(Boolean).join('\n\n').slice(0, MAX_TEXT_LEN); + + return { title, text, imageUrl, source: 'web' }; +} + +// ── Роутер источников ───────────────────────────────────────────────────────── + +async function parseUrl(url) { + try { + const u = new URL(url); + if (u.hostname.includes('youtube.com') || u.hostname.includes('youtu.be')) { + return await parseYoutube(url); + } + if (u.hostname === 't.me') { + return await parseTelegram(url); + } + return await parseWeb(url); + } catch (err) { + throw new Error(`Не удалось загрузить страницу: ${err.message}`); + } +} + +// ── Генерация поста по распарсенному контенту ───────────────────────────────── + +async function generateFromUrl({ url, channelId, channel }) { + if (!url) throw new Error('url required'); + if (!channel) throw new Error('channel required'); + + // 1. Парсим страницу + const parsed = await parseUrl(url); + + if (!parsed.text && !parsed.title) { + throw new Error('Не удалось извлечь текст со страницы'); + } + + // 2. Строим промт + const channelContext = pb.buildPostSystemPrompt(channel, ''); + + const userPrompt = `На основе материала ниже напиши пост для Telegram-канала в стиле этого канала. + +ИСТОЧНИК: ${parsed.source === 'youtube' ? 'YouTube-видео' : parsed.source === 'telegram' ? 'Telegram-пост' : 'Веб-статья'} +URL: ${url} + +ЗАГОЛОВОК: +${parsed.title || '—'} + +СОДЕРЖАНИЕ: +${parsed.text || '(текст не извлечён, опирайся на заголовок)'} + +--- + +ЗАДАЧА: +— Напиши пост в стиле и голосе канала +— Передай суть материала своими словами, не пересказывай дословно +— Добавь свой угол зрения или вывод +— Длина поста: 150–500 символов +— Верни ТОЛЬКО текст поста, без пояснений`; + + // 3. Генерируем + const result = await ai.chat( + require('../config').ai.models.post, + channelContext, + userPrompt, + { maxTokens: 1000, temperature: 0.85 } + ); + + return { + content: result.text, + title: parsed.title, + imageUrl: parsed.imageUrl, + source: parsed.source, + usage: result.usage, + }; +} + +module.exports = { generateFromUrl, parseUrl }; diff --git a/src/services/metricsCollector.js b/src/services/metricsCollector.js new file mode 100644 index 0000000..fae8762 --- /dev/null +++ b/src/services/metricsCollector.js @@ -0,0 +1,162 @@ +/** + * metricsCollector.js + * Воркер сбора метрик для опубликованных постов. + * + * Что умеет: + * - getMessageReactionCount (Bot API 7+) — реакции на пост + * - forwards — getForwardCount (Bot API 8+, если доступен) + * - views — в Bot API недоступны напрямую; оставляем 0 до MTProto + * + * Запуск: каждые 15 минут через setInterval (из index.js) + * или вручную: POST /api/metrics/collect + */ + +const axios = require('axios'); +const { query } = require('../config/db'); + +const COLLECT_WINDOW_DAYS = 30; // собираем метрики для постов за последние N дней + +async function getTgApiBase() { + try { + const { rows } = await query(`SELECT value FROM app_settings WHERE key='TELEGRAM_API_BASE'`); + return rows[0]?.value?.replace(/\/$/, '') || 'https://api.telegram.org'; + } catch { return 'https://api.telegram.org'; } +} + +/** + * Собрать реакции для одного поста. + * Возвращает { reactions: {emoji: count, ...}, forwards: 0, views: 0 } + */ +async function collectForPost({ botToken, tgChannelId, tgMessageId, tgApiBase }) { + const result = { reactions: {}, forwards: 0, views: 0 }; + if (!botToken || !tgChannelId || !tgMessageId) return result; + + try { + const url = `${tgApiBase}/bot${botToken}/getMessageReactionCount`; + const res = await axios.get(url, { + params: { chat_id: tgChannelId, message_id: tgMessageId }, + timeout: 8000, + }); + if (res.data?.ok && Array.isArray(res.data.result?.reactions)) { + for (const r of res.data.result.reactions) { + const emoji = r.type?.emoji || r.type?.custom_emoji_id || '?'; + result.reactions[emoji] = (result.reactions[emoji] || 0) + (r.count || 0); + } + } + } catch (e) { + // 400 = реакции не включены или пост не найден — не критично + if (e.response?.status !== 400) { + console.warn('[Metrics] getMessageReactionCount error:', e.message); + } + } + + return result; +} + +/** + * Основная функция сбора метрик. + * Проходит по posts (системные каналы) за последние COLLECT_WINDOW_DAYS дней. + */ +async function collectMetrics() { + const tgApiBase = await getTgApiBase(); + const since = new Date(Date.now() - COLLECT_WINDOW_DAYS * 86400_000); + + // Берём посты с tg_message_id за последние N дней + const { rows: posts } = await query(` + SELECT p.id, p.tg_message_id, p.channel_id, p.published_at, + c.bot_token, c.tg_channel_id + FROM posts p + JOIN channels c ON c.id = p.channel_id + WHERE p.tg_message_id IS NOT NULL + AND p.published_at > $1 + AND c.platform = 'telegram' + AND c.bot_token IS NOT NULL + ORDER BY p.published_at DESC + LIMIT 100 + `, [since]); + + let updated = 0; + for (const post of posts) { + try { + const metrics = await collectForPost({ + botToken: post.bot_token, + tgChannelId: post.tg_channel_id, + tgMessageId: post.tg_message_id, + tgApiBase, + }); + + const totalReactions = Object.values(metrics.reactions).reduce((s, v) => s + v, 0); + + // Обновляем posts — последний снапшот + await query(` + UPDATE posts + SET reactions=$1, forwards=$2, metrics_at=NOW() + WHERE id=$3 + `, [JSON.stringify(metrics.reactions), metrics.forwards, post.id]); + + // Пишем в историю только если есть хоть что-то + if (totalReactions > 0 || metrics.forwards > 0) { + await query(` + INSERT INTO post_metrics (post_id, captured_at, views, forwards, reactions) + VALUES ($1, NOW(), $2, $3, $4) + `, [post.id, metrics.views, metrics.forwards, JSON.stringify(metrics.reactions)]); + } + + updated++; + } catch (e) { + console.error('[Metrics] post', post.id, 'error:', e.message); + } + } + + // user_posts — пользовательские посты с tg_message_id + const { rows: userPosts } = await query(` + SELECT up.id, up.tg_message_id, up.channel_id, + c.bot_token, c.tg_channel_id + FROM user_posts up + JOIN channels c ON c.id = up.channel_id + WHERE up.tg_message_id IS NOT NULL + AND up.published_at > $1 + AND c.platform = 'telegram' + AND c.bot_token IS NOT NULL + ORDER BY up.published_at DESC + LIMIT 50 + `, [since]); + + for (const post of userPosts) { + try { + const metrics = await collectForPost({ + botToken: post.bot_token, + tgChannelId: post.tg_channel_id, + tgMessageId: post.tg_message_id, + tgApiBase, + }); + + await query(` + UPDATE user_posts + SET reactions=$1, forwards=$2, metrics_at=NOW() + WHERE id=$3 + `, [JSON.stringify(metrics.reactions), metrics.forwards, post.id]); + + updated++; + } catch (e) { + console.error('[Metrics] user_post', post.id, 'error:', e.message); + } + } + + console.log(`[Metrics] Collected for ${updated} posts`); + return { updated }; +} + +// Авто-запуск каждые 15 минут +let _timer = null; +function startAutoCollect() { + if (_timer) return; + _timer = setInterval(() => { + collectMetrics().catch(e => console.error('[Metrics] auto-collect error:', e.message)); + }, 15 * 60 * 1000); + // Первый запуск через 30 секунд после старта + setTimeout(() => collectMetrics().catch(e => console.error('[Metrics] init error:', e.message)), 30_000); + console.log('[Metrics] Auto-collect started (every 15 min)'); +} + +module.exports = { collectMetrics, startAutoCollect };