opencode-browser-tool-insta.../scripts/google_pagination_v4.mjs

113 lines
3.3 KiB
JavaScript

import path from "node:path"
import { BrowserManager } from "../dist/browser/manager.js"
const browser = new BrowserManager(path.resolve("./artifacts"))
const maxPages = 8
const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms))
const linksScript = `() =>
Array.from(document.querySelectorAll('a[href]'))
.map((a) => a.getAttribute('href') || '')
.filter(Boolean)`
const nextScript = `() => {
const labels = ['Siguiente', 'Next']
const nodes = Array.from(document.querySelectorAll('a,button,[role="button"]'))
for (const n of nodes) {
const t = (n.innerText || '').trim()
if (!t) continue
if (labels.some((k) => t === k || t.includes(k))) {
n.click()
return { clicked: true, text: t }
}
}
const byId = document.querySelector('#pnnext')
if (byId) {
byId.click()
return { clicked: true, text: '#pnnext' }
}
return { clicked: false, text: null }
}`
const run = async () => {
const out = { pages: [] }
out.open = await browser.open({
headless: false,
width: 1366,
height: 900,
startUrl: "https://www.google.com",
recordVideo: true,
recordLabel: "google-pagination-v4",
})
out.consent = await browser.handleConsent("reject").catch((e) => ({ error: e.message }))
await browser.type('textarea[name="q"]', "barranquismo granada")
await browser.press("Enter")
out.observeStart = await browser.observe()
if ((out.observeStart.url || "").includes("/sorry/index")) {
out.human = await browser.handleHumanCheck().catch((e) => ({ error: e.message }))
await browser.waitFor({ for: "timeout", value: "4000" })
out.observeAfterHuman = await browser.observe().catch((e) => ({ error: e.message }))
}
let found = null
for (let p = 1; p <= maxPages; p += 1) {
const url = browser.getState().currentUrl || ""
if (!url.includes("/search")) {
out.pages.push({ page: p, status: "not-on-search", url })
break
}
const links = await browser.evaluate(linksScript, undefined)
const normalized = Array.isArray(links.result) ? links.result.map(String) : []
const matchIndex = normalized.findIndex((href) => href.includes("barranquismogranada.com"))
out.pages.push({
page: p,
url,
links: normalized.length,
found: matchIndex >= 0,
idx: matchIndex >= 0 ? matchIndex + 1 : null,
})
if (matchIndex >= 0) {
found = { page: p, positionOnPage: matchIndex + 1, url }
break
}
const nextTried = await browser.evaluate(nextScript, undefined)
out.pages[out.pages.length - 1].next = nextTried.result
if (!nextTried.result?.clicked) {
break
}
await wait(1400)
}
out.result = found || { found: false }
out.finalObserve = await browser.observe().catch((e) => ({ error: e.message }))
out.snapshot = await browser.snapshot({ label: "google-pagination-v4-final", fullPage: true }).catch((e) => ({
error: e.message,
}))
out.close = await browser.close()
console.log(JSON.stringify(out, null, 2))
}
run().catch(async (err) => {
console.error("GOOGLE_PAGINATION_V4_ERROR", err?.message || err)
try {
const snap = await browser.snapshot({ label: "google-pagination-v4-error", fullPage: true })
console.error("ERROR_SNAPSHOT", snap.filePath)
} catch {
// ignore
}
try {
await browser.close()
} catch {
// ignore
}
process.exit(1)
})