113 lines
3.3 KiB
JavaScript
113 lines
3.3 KiB
JavaScript
import path from "node:path"
|
|
import { BrowserManager } from "../dist/browser/manager.js"
|
|
|
|
const browser = new BrowserManager(path.resolve("./artifacts"))
|
|
const maxPages = 8
|
|
|
|
const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms))
|
|
|
|
const linksScript = `() =>
|
|
Array.from(document.querySelectorAll('a[href]'))
|
|
.map((a) => a.getAttribute('href') || '')
|
|
.filter(Boolean)`
|
|
|
|
const nextScript = `() => {
|
|
const labels = ['Siguiente', 'Next']
|
|
const nodes = Array.from(document.querySelectorAll('a,button,[role="button"]'))
|
|
for (const n of nodes) {
|
|
const t = (n.innerText || '').trim()
|
|
if (!t) continue
|
|
if (labels.some((k) => t === k || t.includes(k))) {
|
|
n.click()
|
|
return { clicked: true, text: t }
|
|
}
|
|
}
|
|
const byId = document.querySelector('#pnnext')
|
|
if (byId) {
|
|
byId.click()
|
|
return { clicked: true, text: '#pnnext' }
|
|
}
|
|
return { clicked: false, text: null }
|
|
}`
|
|
|
|
const run = async () => {
|
|
const out = { pages: [] }
|
|
out.open = await browser.open({
|
|
headless: false,
|
|
width: 1366,
|
|
height: 900,
|
|
startUrl: "https://www.google.com",
|
|
recordVideo: true,
|
|
recordLabel: "google-pagination-v4",
|
|
})
|
|
|
|
out.consent = await browser.handleConsent("reject").catch((e) => ({ error: e.message }))
|
|
await browser.type('textarea[name="q"]', "barranquismo granada")
|
|
await browser.press("Enter")
|
|
|
|
out.observeStart = await browser.observe()
|
|
if ((out.observeStart.url || "").includes("/sorry/index")) {
|
|
out.human = await browser.handleHumanCheck().catch((e) => ({ error: e.message }))
|
|
await browser.waitFor({ for: "timeout", value: "4000" })
|
|
out.observeAfterHuman = await browser.observe().catch((e) => ({ error: e.message }))
|
|
}
|
|
|
|
let found = null
|
|
|
|
for (let p = 1; p <= maxPages; p += 1) {
|
|
const url = browser.getState().currentUrl || ""
|
|
if (!url.includes("/search")) {
|
|
out.pages.push({ page: p, status: "not-on-search", url })
|
|
break
|
|
}
|
|
|
|
const links = await browser.evaluate(linksScript, undefined)
|
|
const normalized = Array.isArray(links.result) ? links.result.map(String) : []
|
|
const matchIndex = normalized.findIndex((href) => href.includes("barranquismogranada.com"))
|
|
|
|
out.pages.push({
|
|
page: p,
|
|
url,
|
|
links: normalized.length,
|
|
found: matchIndex >= 0,
|
|
idx: matchIndex >= 0 ? matchIndex + 1 : null,
|
|
})
|
|
|
|
if (matchIndex >= 0) {
|
|
found = { page: p, positionOnPage: matchIndex + 1, url }
|
|
break
|
|
}
|
|
|
|
const nextTried = await browser.evaluate(nextScript, undefined)
|
|
out.pages[out.pages.length - 1].next = nextTried.result
|
|
if (!nextTried.result?.clicked) {
|
|
break
|
|
}
|
|
await wait(1400)
|
|
}
|
|
|
|
out.result = found || { found: false }
|
|
out.finalObserve = await browser.observe().catch((e) => ({ error: e.message }))
|
|
out.snapshot = await browser.snapshot({ label: "google-pagination-v4-final", fullPage: true }).catch((e) => ({
|
|
error: e.message,
|
|
}))
|
|
out.close = await browser.close()
|
|
|
|
console.log(JSON.stringify(out, null, 2))
|
|
}
|
|
|
|
run().catch(async (err) => {
|
|
console.error("GOOGLE_PAGINATION_V4_ERROR", err?.message || err)
|
|
try {
|
|
const snap = await browser.snapshot({ label: "google-pagination-v4-error", fullPage: true })
|
|
console.error("ERROR_SNAPSHOT", snap.filePath)
|
|
} catch {
|
|
// ignore
|
|
}
|
|
try {
|
|
await browser.close()
|
|
} catch {
|
|
// ignore
|
|
}
|
|
process.exit(1)
|
|
})
|