a370b8f7d8
- Персонаж Зеро: 23 позы (zeroCharacter.js), скрипты генерации - Auto-publish статей в TG: multipart upload, кнопки, режим alternating Zero/cover - Fallback цепочка обложек: aiprimetech gpt-5.5 → Pollinations → local SVG (6 палитр) - Auto-series: Claude haiku определяет серию для каждой статьи автоматически - Channel stats: подписчики, история, delta 24h/7d - Photo-search: Yandex API, профили доменов, Redis лимиты - Scheduled posts runner: backfill, preview, queue, cancel - promptBuilder: author_persona Зеро, голос от первого лица - Fixes: dollar-placeholder bugs в PATCH channels/autogen, listArticles фильтры - AI model: gpt-5.5 для image generation
263 lines
9.1 KiB
JavaScript
263 lines
9.1 KiB
JavaScript
// Поиск фото через Yandex Search API (Image search v2)
|
|
//
|
|
// Архитектура:
|
|
// - Запрос → searchapi.api.cloud.yandex.net/v2/image/search
|
|
// - Ответ: JSON { rawData: base64 }, внутри base64 — XML с результатами
|
|
// - Парсим XML, нормализуем в массив объектов
|
|
// - Фильтруем по whitelist доменов из photo_search_profiles
|
|
// - Фильтруем по min-size (отсев иконок)
|
|
// - Считаем суточный лимит в Redis (ключ photo_search:count:YYYY-MM-DD)
|
|
//
|
|
// Если меняем провайдера (yandex → serpapi) — этот модуль будет адаптером,
|
|
// логика квот и фильтрации профилей остаётся.
|
|
|
|
const axios = require('axios');
|
|
const { XMLParser } = require('fast-xml-parser');
|
|
const Redis = require('ioredis');
|
|
const settings = require('./settings');
|
|
const config = require('../config');
|
|
const { query: dbQuery } = require('../config/db');
|
|
|
|
const YANDEX_ENDPOINT = 'https://searchapi.api.cloud.yandex.net/v2/image/search';
|
|
const MIN_DIMENSION_PX = 400;
|
|
const USER_AGENT = 'Mozilla/5.0 (compatible; ZeroPost/1.0)';
|
|
|
|
let _redis = null;
|
|
function getRedis() {
|
|
if (!_redis) {
|
|
_redis = new Redis({
|
|
host: config.redis.host,
|
|
port: config.redis.port,
|
|
lazyConnect: false,
|
|
maxRetriesPerRequest: 3,
|
|
});
|
|
_redis.on('error', (err) => console.error('[photo-search] redis error:', err.message));
|
|
}
|
|
return _redis;
|
|
}
|
|
|
|
// ── Квоты (Redis daily counter) ──────────────────────────────────────────────
|
|
|
|
function dailyKey() {
|
|
return `photo_search:count:${new Date().toISOString().slice(0, 10)}`;
|
|
}
|
|
|
|
async function getDailyCount() {
|
|
try {
|
|
const v = await getRedis().get(dailyKey());
|
|
return parseInt(v) || 0;
|
|
} catch {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
async function incrementDaily() {
|
|
try {
|
|
const r = getRedis();
|
|
const k = dailyKey();
|
|
const count = await r.incr(k);
|
|
if (count === 1) await r.expire(k, 172800); // 48h TTL
|
|
return count;
|
|
} catch (err) {
|
|
console.error('[photo-search] incr failed:', err.message);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
async function getQuotaStatus() {
|
|
const limit = parseInt(await settings.get('YANDEX_SEARCH_DAILY_LIMIT', '300'));
|
|
const used = await getDailyCount();
|
|
return { used, limit, remaining: Math.max(0, limit - used) };
|
|
}
|
|
|
|
// ── Парсинг XML ответа Yandex ────────────────────────────────────────────────
|
|
|
|
const xmlParser = new XMLParser({
|
|
ignoreAttributes: false,
|
|
attributeNamePrefix: '@_',
|
|
textNodeName: '#text',
|
|
parseAttributeValue: false,
|
|
trimValues: true,
|
|
});
|
|
|
|
function parseYandexXml(base64Data) {
|
|
const xmlText = Buffer.from(base64Data, 'base64').toString('utf-8');
|
|
const parsed = xmlParser.parse(xmlText);
|
|
const response = parsed?.yandexsearch?.response;
|
|
if (!response) {
|
|
throw new Error('Unexpected Yandex response: no <response>');
|
|
}
|
|
if (response.error) {
|
|
const errText = typeof response.error === 'object' ? response.error['#text'] || JSON.stringify(response.error) : response.error;
|
|
throw new Error(`Yandex error: ${errText}`);
|
|
}
|
|
|
|
const grouping = response.results?.grouping;
|
|
if (!grouping) return { total: 0, docs: [] };
|
|
|
|
const groups = Array.isArray(grouping.group) ? grouping.group : [grouping.group].filter(Boolean);
|
|
const docs = [];
|
|
|
|
for (const group of groups) {
|
|
const groupDocs = Array.isArray(group.doc) ? group.doc : [group.doc].filter(Boolean);
|
|
for (const doc of groupDocs) {
|
|
const imgProps = doc['image-properties'] || {};
|
|
const titleText = typeof doc.title === 'object' ? (doc.title['#text'] || '') : (doc.title || '');
|
|
const passageText = typeof doc.passage === 'object' ? (doc.passage['#text'] || '') : (doc.passage || '');
|
|
docs.push({
|
|
imageUrl: imgProps['image-link'] || doc.url || null,
|
|
thumbUrl: imgProps['thumbnail-link'] || null,
|
|
sourceUrl: imgProps['html-link'] || null,
|
|
sourceDomain: doc.domain || null,
|
|
title: String(titleText).slice(0, 200),
|
|
passage: String(passageText).slice(0, 200),
|
|
width: parseInt(imgProps['original-width']) || 0,
|
|
height: parseInt(imgProps['original-height']) || 0,
|
|
thumbWidth: parseInt(imgProps['thumbnail-width']) || 0,
|
|
thumbHeight: parseInt(imgProps['thumbnail-height']) || 0,
|
|
});
|
|
}
|
|
}
|
|
|
|
const foundArr = Array.isArray(response.found) ? response.found : (response.found ? [response.found] : []);
|
|
const foundAll = foundArr.find(f => f['@_priority'] === 'all');
|
|
const total = foundAll ? parseInt(foundAll['#text']) : docs.length;
|
|
|
|
return { total, docs };
|
|
}
|
|
|
|
// ── Фильтрация результатов ───────────────────────────────────────────────────
|
|
|
|
function matchesDomain(domain, whitelist) {
|
|
if (!domain || !whitelist || whitelist.length === 0) return true;
|
|
const d = domain.toLowerCase();
|
|
return whitelist.some(allowed => {
|
|
const a = allowed.toLowerCase();
|
|
return d === a || d.endsWith('.' + a);
|
|
});
|
|
}
|
|
|
|
function meetsMinSize(doc) {
|
|
if (!doc.width || !doc.height) return true; // unknown size — пропускаем
|
|
return Math.min(doc.width, doc.height) >= MIN_DIMENSION_PX;
|
|
}
|
|
|
|
// ── Profile lookup ───────────────────────────────────────────────────────────
|
|
|
|
async function getProfileDomains(slug) {
|
|
if (!slug) return [];
|
|
const { rows } = await dbQuery(
|
|
'SELECT domains FROM photo_search_profiles WHERE slug=$1',
|
|
[slug]
|
|
);
|
|
return rows[0]?.domains || [];
|
|
}
|
|
|
|
// ── Main: searchByQuery ──────────────────────────────────────────────────────
|
|
|
|
async function searchByQuery({ query, profileSlug = 'general', num = 6 }) {
|
|
if (!query || typeof query !== 'string') {
|
|
throw new Error('query is required');
|
|
}
|
|
|
|
// Квота
|
|
const limit = parseInt(await settings.get('YANDEX_SEARCH_DAILY_LIMIT', '300'));
|
|
const used = await getDailyCount();
|
|
if (used >= limit) {
|
|
const err = new Error(`Daily photo search limit reached: ${used}/${limit}`);
|
|
err.code = 'DAILY_LIMIT_EXCEEDED';
|
|
throw err;
|
|
}
|
|
|
|
// Credentials
|
|
const apiKey = await settings.get('YANDEX_SEARCH_API_KEY', '');
|
|
const folderId = await settings.get('YANDEX_SEARCH_FOLDER_ID', '');
|
|
if (!apiKey || !folderId) {
|
|
throw new Error('Yandex Search API not configured (YANDEX_SEARCH_API_KEY / YANDEX_SEARCH_FOLDER_ID)');
|
|
}
|
|
|
|
// Profile
|
|
const domains = await getProfileDomains(profileSlug);
|
|
|
|
// Запросим с запасом — потом отфильтруем
|
|
const docsOnPage = Math.min(Math.max(num * 4, 10), 50);
|
|
|
|
const requestBody = {
|
|
query: {
|
|
searchType: 'SEARCH_TYPE_RU',
|
|
queryText: query.trim(),
|
|
familyMode: 'FAMILY_MODE_MODERATE',
|
|
page: '0',
|
|
fixTypoMode: 'FIX_TYPO_MODE_ON',
|
|
},
|
|
imageSpec: {
|
|
format: 'IMAGE_FORMAT_UNSPECIFIED',
|
|
size: 'IMAGE_SIZE_LARGE',
|
|
orientation: 'IMAGE_ORIENTATION_UNSPECIFIED',
|
|
color: 'IMAGE_COLOR_UNSPECIFIED',
|
|
},
|
|
docsOnPage: String(docsOnPage),
|
|
folderId,
|
|
userAgent: USER_AGENT,
|
|
};
|
|
|
|
await incrementDaily();
|
|
const startMs = Date.now();
|
|
|
|
let response;
|
|
try {
|
|
response = await axios.post(YANDEX_ENDPOINT, requestBody, {
|
|
headers: {
|
|
'Authorization': `Api-Key ${apiKey}`,
|
|
'Content-Type': 'application/json',
|
|
},
|
|
timeout: 20000,
|
|
});
|
|
} catch (err) {
|
|
const status = err.response?.status;
|
|
const data = err.response?.data;
|
|
const detail = data?.message || data?.code || err.message;
|
|
const e = new Error(`Yandex Search API request failed (${status || 'no-response'}): ${detail}`);
|
|
e.status = status;
|
|
throw e;
|
|
}
|
|
|
|
const elapsedMs = Date.now() - startMs;
|
|
|
|
if (!response.data?.rawData) {
|
|
throw new Error('Yandex response missing rawData field');
|
|
}
|
|
|
|
const { total, docs } = parseYandexXml(response.data.rawData);
|
|
|
|
// Фильтрация
|
|
let filtered = docs.filter(meetsMinSize);
|
|
if (domains.length > 0) {
|
|
filtered = filtered.filter(d => matchesDomain(d.sourceDomain, domains));
|
|
}
|
|
|
|
// Дедуп по imageUrl (на всякий случай)
|
|
const seen = new Set();
|
|
const dedup = [];
|
|
for (const d of filtered) {
|
|
if (!d.imageUrl || seen.has(d.imageUrl)) continue;
|
|
seen.add(d.imageUrl);
|
|
dedup.push(d);
|
|
}
|
|
|
|
const items = dedup.slice(0, num);
|
|
|
|
return {
|
|
items,
|
|
total,
|
|
raw_count: docs.length,
|
|
filtered_count: filtered.length,
|
|
elapsed_ms: elapsedMs,
|
|
quota: { used: used + 1, limit, remaining: Math.max(0, limit - used - 1) },
|
|
profile: profileSlug,
|
|
domains: domains,
|
|
};
|
|
}
|
|
|
|
module.exports = { searchByQuery, getQuotaStatus, parseYandexXml };
|