// Поиск фото через Yandex Search API (Image search v2) // // Архитектура: // - Запрос → searchapi.api.cloud.yandex.net/v2/image/search // - Ответ: JSON { rawData: base64 }, внутри base64 — XML с результатами // - Парсим XML, нормализуем в массив объектов // - Фильтруем по whitelist доменов из photo_search_profiles // - Фильтруем по min-size (отсев иконок) // - Считаем суточный лимит в Redis (ключ photo_search:count:YYYY-MM-DD) // // Если меняем провайдера (yandex → serpapi) — этот модуль будет адаптером, // логика квот и фильтрации профилей остаётся. const axios = require('axios'); const { XMLParser } = require('fast-xml-parser'); const Redis = require('ioredis'); const settings = require('./settings'); const config = require('../config'); const { query: dbQuery } = require('../config/db'); const YANDEX_ENDPOINT = 'https://searchapi.api.cloud.yandex.net/v2/image/search'; const MIN_DIMENSION_PX = 400; const USER_AGENT = 'Mozilla/5.0 (compatible; ZeroPost/1.0)'; let _redis = null; function getRedis() { if (!_redis) { _redis = new Redis({ host: config.redis.host, port: config.redis.port, lazyConnect: false, maxRetriesPerRequest: 3, }); _redis.on('error', (err) => console.error('[photo-search] redis error:', err.message)); } return _redis; } // ── Квоты (Redis daily counter) ────────────────────────────────────────────── function dailyKey() { return `photo_search:count:${new Date().toISOString().slice(0, 10)}`; } async function getDailyCount() { try { const v = await getRedis().get(dailyKey()); return parseInt(v) || 0; } catch { return 0; } } async function incrementDaily() { try { const r = getRedis(); const k = dailyKey(); const count = await r.incr(k); if (count === 1) await r.expire(k, 172800); // 48h TTL return count; } catch (err) { console.error('[photo-search] incr failed:', err.message); return 0; } } async function getQuotaStatus() { const limit = parseInt(await settings.get('YANDEX_SEARCH_DAILY_LIMIT', '300')); const used = await getDailyCount(); return { used, limit, remaining: Math.max(0, limit - used) }; } // ── Парсинг XML ответа Yandex ──────────────────────────────────────────────── const xmlParser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: '@_', textNodeName: '#text', parseAttributeValue: false, trimValues: true, }); function parseYandexXml(base64Data) { const xmlText = Buffer.from(base64Data, 'base64').toString('utf-8'); const parsed = xmlParser.parse(xmlText); const response = parsed?.yandexsearch?.response; if (!response) { throw new Error('Unexpected Yandex response: no '); } if (response.error) { const errText = typeof response.error === 'object' ? response.error['#text'] || JSON.stringify(response.error) : response.error; throw new Error(`Yandex error: ${errText}`); } const grouping = response.results?.grouping; if (!grouping) return { total: 0, docs: [] }; const groups = Array.isArray(grouping.group) ? grouping.group : [grouping.group].filter(Boolean); const docs = []; for (const group of groups) { const groupDocs = Array.isArray(group.doc) ? group.doc : [group.doc].filter(Boolean); for (const doc of groupDocs) { const imgProps = doc['image-properties'] || {}; const titleText = typeof doc.title === 'object' ? (doc.title['#text'] || '') : (doc.title || ''); const passageText = typeof doc.passage === 'object' ? (doc.passage['#text'] || '') : (doc.passage || ''); docs.push({ imageUrl: imgProps['image-link'] || doc.url || null, thumbUrl: imgProps['thumbnail-link'] || null, sourceUrl: imgProps['html-link'] || null, sourceDomain: doc.domain || null, title: String(titleText).slice(0, 200), passage: String(passageText).slice(0, 200), width: parseInt(imgProps['original-width']) || 0, height: parseInt(imgProps['original-height']) || 0, thumbWidth: parseInt(imgProps['thumbnail-width']) || 0, thumbHeight: parseInt(imgProps['thumbnail-height']) || 0, }); } } const foundArr = Array.isArray(response.found) ? response.found : (response.found ? [response.found] : []); const foundAll = foundArr.find(f => f['@_priority'] === 'all'); const total = foundAll ? parseInt(foundAll['#text']) : docs.length; return { total, docs }; } // ── Фильтрация результатов ─────────────────────────────────────────────────── function matchesDomain(domain, whitelist) { if (!domain || !whitelist || whitelist.length === 0) return true; const d = domain.toLowerCase(); return whitelist.some(allowed => { const a = allowed.toLowerCase(); return d === a || d.endsWith('.' + a); }); } function meetsMinSize(doc) { if (!doc.width || !doc.height) return true; // unknown size — пропускаем return Math.min(doc.width, doc.height) >= MIN_DIMENSION_PX; } // ── Profile lookup ─────────────────────────────────────────────────────────── async function getProfileDomains(slug) { if (!slug) return []; const { rows } = await dbQuery( 'SELECT domains FROM photo_search_profiles WHERE slug=$1', [slug] ); return rows[0]?.domains || []; } // ── Main: searchByQuery ────────────────────────────────────────────────────── async function searchByQuery({ query, profileSlug = 'general', num = 6 }) { if (!query || typeof query !== 'string') { throw new Error('query is required'); } // Квота const limit = parseInt(await settings.get('YANDEX_SEARCH_DAILY_LIMIT', '300')); const used = await getDailyCount(); if (used >= limit) { const err = new Error(`Daily photo search limit reached: ${used}/${limit}`); err.code = 'DAILY_LIMIT_EXCEEDED'; throw err; } // Credentials const apiKey = await settings.get('YANDEX_SEARCH_API_KEY', ''); const folderId = await settings.get('YANDEX_SEARCH_FOLDER_ID', ''); if (!apiKey || !folderId) { throw new Error('Yandex Search API not configured (YANDEX_SEARCH_API_KEY / YANDEX_SEARCH_FOLDER_ID)'); } // Profile const domains = await getProfileDomains(profileSlug); // Запросим с запасом — потом отфильтруем const docsOnPage = Math.min(Math.max(num * 4, 10), 50); const requestBody = { query: { searchType: 'SEARCH_TYPE_RU', queryText: query.trim(), familyMode: 'FAMILY_MODE_MODERATE', page: '0', fixTypoMode: 'FIX_TYPO_MODE_ON', }, imageSpec: { format: 'IMAGE_FORMAT_UNSPECIFIED', size: 'IMAGE_SIZE_LARGE', orientation: 'IMAGE_ORIENTATION_UNSPECIFIED', color: 'IMAGE_COLOR_UNSPECIFIED', }, docsOnPage: String(docsOnPage), folderId, userAgent: USER_AGENT, }; await incrementDaily(); const startMs = Date.now(); let response; try { response = await axios.post(YANDEX_ENDPOINT, requestBody, { headers: { 'Authorization': `Api-Key ${apiKey}`, 'Content-Type': 'application/json', }, timeout: 20000, }); } catch (err) { const status = err.response?.status; const data = err.response?.data; const detail = data?.message || data?.code || err.message; const e = new Error(`Yandex Search API request failed (${status || 'no-response'}): ${detail}`); e.status = status; throw e; } const elapsedMs = Date.now() - startMs; if (!response.data?.rawData) { throw new Error('Yandex response missing rawData field'); } const { total, docs } = parseYandexXml(response.data.rawData); // Фильтрация let filtered = docs.filter(meetsMinSize); if (domains.length > 0) { filtered = filtered.filter(d => matchesDomain(d.sourceDomain, domains)); } // Дедуп по imageUrl (на всякий случай) const seen = new Set(); const dedup = []; for (const d of filtered) { if (!d.imageUrl || seen.has(d.imageUrl)) continue; seen.add(d.imageUrl); dedup.push(d); } const items = dedup.slice(0, num); return { items, total, raw_count: docs.length, filtered_count: filtered.length, elapsed_ms: elapsedMs, quota: { used: used + 1, limit, remaining: Math.max(0, limit - used - 1) }, profile: profileSlug, domains: domains, }; } module.exports = { searchByQuery, getQuotaStatus, parseYandexXml };