fix: duplicate article prevention — source_topic deduplication

autogen.js: getNextTopic() теперь проверяет source_topic (exact match) вместо
  сравнения первых 20 символов заголовка (который AI переименовывает)
articles.js: INSERT сохраняет source_topic из topic параметра
DB: articles.source_topic TEXT, articles.topic_hash VARCHAR(64)
Пометили существующие дубли: article 61 → archived, source_topic заполнен
This commit is contained in:
Ник (Claude)
2026-06-12 11:49:33 +03:00
parent bbae6c8832
commit 7a70f79e61
2 changed files with 25 additions and 7 deletions
+3 -2
View File
@@ -143,8 +143,8 @@ async function generateAndSaveArticle({ topic, keywords = [], tags = [], autoPub
)); ));
const { rows: artRows } = await query( const { rows: artRows } = await query(
`INSERT INTO articles (slug, title, excerpt, content, tags, category, reading_time, status, job_id, seo_title, seo_descr) `INSERT INTO articles (slug, title, excerpt, content, tags, category, reading_time, status, job_id, seo_title, seo_descr, source_topic)
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11) RETURNING *`, VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12) RETURNING *`,
[ [
slug, title, excerpt, content, slug, title, excerpt, content,
JSON.stringify(cleanTags), JSON.stringify(cleanTags),
@@ -154,6 +154,7 @@ async function generateAndSaveArticle({ topic, keywords = [], tags = [], autoPub
jobId, jobId,
title.substring(0, 60), title.substring(0, 60),
excerpt.substring(0, 160), excerpt.substring(0, 160),
topic || null,
] ]
); );
+22 -5
View File
@@ -69,14 +69,31 @@ async function getNextTopic(category) {
if (rows.length) { if (rows.length) {
return { id: rows[0].id, topic: rows[0].topic, tags: rows[0].tags || [], keywords: rows[0].keywords || [] }; return { id: rows[0].id, topic: rows[0].topic, tags: rows[0].tags || [], keywords: rows[0].keywords || [] };
} }
// Из банка — случайная тема которой ещё не было // Из банка — темы которые ещё не использовались
const bank = TOPIC_BANK[category] || TOPIC_BANK['ai-tools']; const bank = TOPIC_BANK[category] || TOPIC_BANK['ai-tools'];
const { rows: used } = await query(
`SELECT a.title FROM articles a WHERE a.category=$1 AND a.status='published'`, // Получаем уже использованные темы по source_topic (точное совпадение)
const { rows: usedTopics } = await query(
`SELECT source_topic FROM articles WHERE category=$1 AND source_topic IS NOT NULL`,
[category] [category]
); );
const usedTitles = used.map(r => r.title.toLowerCase()); const usedSet = new Set(usedTopics.map(r => r.source_topic.toLowerCase().trim()));
const unused = bank.filter(t => !usedTitles.some(u => u.includes(t.slice(0, 20).toLowerCase())));
// Также проверяем по заголовкам (fallback для старых статей без source_topic)
const { rows: usedTitles } = await query(
`SELECT title FROM articles WHERE category=$1 AND source_topic IS NULL AND status='published'`,
[category]
);
const titlesLower = usedTitles.map(r => r.title.toLowerCase());
const unused = bank.filter(t => {
const tLow = t.toLowerCase().trim();
if (usedSet.has(tLow)) return false;
// Fallback: проверяем по первым 30 символам заголовка
if (titlesLower.some(title => title.includes(tLow.slice(0, 30)))) return false;
return true;
});
const pool = unused.length > 0 ? unused : bank; const pool = unused.length > 0 ? unused : bank;
const topic = pool[Math.floor(Math.random() * pool.length)]; const topic = pool[Math.floor(Math.random() * pool.length)];
return { id: null, topic, tags: [], keywords: [] }; return { id: null, topic, tags: [], keywords: [] };