246 lines
7.2 KiB
JavaScript
246 lines
7.2 KiB
JavaScript
import path from "node:path"
|
|
import { BrowserManager } from "../dist/browser/manager.js"
|
|
|
|
const browser = new BrowserManager(path.resolve("./artifacts"))
|
|
const targetDomain = "barranquismogranada.com"
|
|
const maxPages = 10
|
|
const out = { pages: [], debug: [] }
|
|
|
|
const sleep = (ms) => new Promise((r) => setTimeout(r, ms))
|
|
|
|
const organicScript = `() => {
|
|
const root = document.querySelector("#search") || document
|
|
const anchors = Array.from(root.querySelectorAll("a[href]"))
|
|
const results = []
|
|
for (const a of anchors) {
|
|
const href = a.getAttribute("href") || ""
|
|
if (!href.startsWith("http")) continue
|
|
if (href.includes("google.") || href.includes("webcache.googleusercontent.com")) continue
|
|
const h3 = a.querySelector("h3")
|
|
if (!h3) continue
|
|
const title = (h3.textContent || "").trim()
|
|
if (!title) continue
|
|
if (!results.some((x) => x.href === href)) results.push({ href, title })
|
|
}
|
|
return results
|
|
}`
|
|
|
|
const nextFallbackScript = `() => {
|
|
const nodes = Array.from(document.querySelectorAll("a,button,[role=button]"))
|
|
for (const n of nodes) {
|
|
const t = (n.innerText || "").trim()
|
|
if (!t) continue
|
|
if (t === "Siguiente" || t.includes("Siguiente") || t === "Next" || t.includes("Next")) {
|
|
n.click()
|
|
return { clicked: true, text: t }
|
|
}
|
|
}
|
|
return { clicked: false, text: null }
|
|
}`
|
|
|
|
const safeEval = async (script, arg, tag) => {
|
|
for (let i = 0; i < 12; i += 1) {
|
|
try {
|
|
return await browser.evaluate(script, arg)
|
|
} catch {
|
|
out.debug.push(`${tag}:eval-retry-${i + 1}`)
|
|
await sleep(450)
|
|
}
|
|
}
|
|
return { result: null }
|
|
}
|
|
|
|
const safeObserve = async (tag) => {
|
|
for (let i = 0; i < 8; i += 1) {
|
|
try {
|
|
return await browser.observe()
|
|
} catch {
|
|
out.debug.push(`${tag}:observe-retry-${i + 1}`)
|
|
await sleep(450)
|
|
}
|
|
}
|
|
return { url: browser.getState().currentUrl, failed: true }
|
|
}
|
|
|
|
const safeScroll = async (y) => {
|
|
try {
|
|
return await browser.scroll({ y })
|
|
} catch {
|
|
return { movedY: 0 }
|
|
}
|
|
}
|
|
|
|
const run = async () => {
|
|
out.open = await browser.open({
|
|
headless: false,
|
|
width: 1366,
|
|
height: 900,
|
|
browserKind: "testing",
|
|
persistentProfile: true,
|
|
userDataDir: "/home/pancho/.chromium-perfil-google",
|
|
startUrl: "https://www.google.es",
|
|
recordVideo: false,
|
|
recordLabel: "google-es-organic-click-scroll-v8d",
|
|
})
|
|
|
|
out.cookies = await browser.handleConsent("accept").catch((e) => ({ error: e.message }))
|
|
await browser.type('textarea[name="q"]', "barranquismo granada")
|
|
await browser.press("Enter")
|
|
await browser.waitFor({ for: "timeout", value: "1200" })
|
|
|
|
out.afterSearch = await safeObserve("afterSearch")
|
|
|
|
if ((out.afterSearch.url || "").includes("/sorry/index")) {
|
|
out.humanAttempt = await browser.handleHumanCheck().catch((e) => ({ error: e.message }))
|
|
out.before3s = await browser.snapshot({ label: "google-v8d-before-3s", fullPage: true }).catch((e) => ({
|
|
error: e.message,
|
|
}))
|
|
await browser.waitFor({ for: "timeout", value: "3000" })
|
|
out.after3sObserve = await safeObserve("after3s")
|
|
out.after3s = await browser.snapshot({ label: "google-v8d-after-3s", fullPage: true }).catch((e) => ({
|
|
error: e.message,
|
|
}))
|
|
}
|
|
|
|
let found = null
|
|
let globalRank = 0
|
|
|
|
for (let pageNo = 1; pageNo <= maxPages; pageNo += 1) {
|
|
await browser.waitFor({ for: "timeout", value: "700" })
|
|
const currentUrl = browser.getState().currentUrl || ""
|
|
if (!currentUrl.includes("/search")) {
|
|
out.pages.push({ page: pageNo, status: "not_on_search", url: currentUrl })
|
|
break
|
|
}
|
|
|
|
const extracted = await safeEval(organicScript, {}, `organic-p${pageNo}`)
|
|
const organic = Array.isArray(extracted.result) ? extracted.result : []
|
|
const idx = organic.findIndex((x) => {
|
|
try {
|
|
return new URL(String(x.href)).hostname.includes(targetDomain)
|
|
} catch {
|
|
return String(x.href).includes(targetDomain)
|
|
}
|
|
})
|
|
|
|
out.pages.push({
|
|
page: pageNo,
|
|
url: currentUrl,
|
|
organicCount: organic.length,
|
|
found: idx >= 0,
|
|
positionOnPage: idx >= 0 ? idx + 1 : null,
|
|
})
|
|
|
|
if (idx >= 0) {
|
|
globalRank += idx + 1
|
|
found = {
|
|
page: pageNo,
|
|
positionOnPage: idx + 1,
|
|
globalRank,
|
|
href: organic[idx].href,
|
|
title: organic[idx].title,
|
|
}
|
|
break
|
|
}
|
|
|
|
globalRank += organic.length
|
|
await safeScroll(680)
|
|
await sleep(180)
|
|
await safeScroll(-240)
|
|
|
|
let moved = false
|
|
for (const sel of ["#pnnext", "a#pnnext", "a[aria-label*='siguiente' i]", "a[aria-label*='next' i]"]) {
|
|
const ex = await browser.query({ selector: sel, mode: "exists" }).catch(() => ({ result: false }))
|
|
if (ex.result === true) {
|
|
await browser.click(sel).catch(() => null)
|
|
moved = true
|
|
break
|
|
}
|
|
}
|
|
|
|
if (!moved) {
|
|
const tried = await safeEval(nextFallbackScript, {}, `next-p${pageNo}`)
|
|
out.pages[out.pages.length - 1].nextFallback = tried.result
|
|
if (!tried.result?.clicked) break
|
|
}
|
|
|
|
await sleep(1200)
|
|
}
|
|
|
|
out.result = found || { found: false }
|
|
|
|
if (found?.href) {
|
|
let clicked = false
|
|
const byHrefSelector = `a[href="${String(found.href).replace(/"/g, '\\"')}"]`
|
|
const exists = await browser.query({ selector: byHrefSelector, mode: "exists" }).catch(() => ({ result: false }))
|
|
if (exists.result === true) {
|
|
await browser
|
|
.click(byHrefSelector)
|
|
.then(() => {
|
|
clicked = true
|
|
})
|
|
.catch(() => null)
|
|
}
|
|
|
|
if (!clicked) {
|
|
const clickRes = await safeEval(
|
|
`(arg) => {
|
|
const links = Array.from(document.querySelectorAll("#search a[href], a[href]"))
|
|
const t = links.find((a) => (a.getAttribute("href") || "") === arg.href)
|
|
if (!t) return { clicked: false }
|
|
t.click()
|
|
return { clicked: true }
|
|
}`,
|
|
{ href: found.href },
|
|
"click-target",
|
|
)
|
|
clicked = Boolean(clickRes.result?.clicked)
|
|
}
|
|
|
|
out.clickTarget = { clicked }
|
|
if (!clicked) {
|
|
await browser.navigate(found.href, "domcontentloaded")
|
|
out.clickFallbackNavigate = true
|
|
}
|
|
|
|
await browser.waitFor({ for: "timeout", value: "1000" })
|
|
out.destinationObserve = await safeObserve("destination")
|
|
|
|
let loops = 0
|
|
let lastMoved = 1
|
|
while (loops < 50 && lastMoved !== 0) {
|
|
const s = await safeScroll(420)
|
|
lastMoved = Number(s.movedY ?? 0)
|
|
await sleep(220)
|
|
loops += 1
|
|
}
|
|
|
|
out.scrollRun = { loops, lastMoved }
|
|
out.destinationSnapshot = await browser.snapshot({ label: "google-v8d-destination-bottom", fullPage: true }).catch((e) => ({
|
|
error: e.message,
|
|
}))
|
|
}
|
|
|
|
out.finalObserve = await safeObserve("final")
|
|
out.finalSnapshot = await browser.snapshot({ label: "google-v8d-final", fullPage: true }).catch((e) => ({
|
|
error: e.message,
|
|
}))
|
|
out.close = await browser.close()
|
|
console.log(JSON.stringify(out, null, 2))
|
|
}
|
|
|
|
run().catch(async (err) => {
|
|
console.error("GOOGLE_V8D_ERROR", err?.stack || err?.message || err)
|
|
try {
|
|
const snap = await browser.snapshot({ label: "google-v8d-error", fullPage: true })
|
|
console.error("ERROR_SNAPSHOT", snap.filePath)
|
|
} catch {
|
|
// ignore
|
|
}
|
|
try {
|
|
await browser.close()
|
|
} catch {
|
|
// ignore
|
|
}
|
|
process.exit(1)
|
|
})
|