opencode-browser-tool-insta.../scripts/google_es_puertas_rank.mjs

163 lines
5.1 KiB
JavaScript

import path from "node:path"
import { BrowserManager } from "../dist/browser/manager.js"
const browser = new BrowserManager(path.resolve("./artifacts"))
const query = "puertas cortafuegos de madera"
const targetDomain = "puertastecnicasbcn.com"
const maxPages = 10
const sleep = (ms) => new Promise((r) => setTimeout(r, ms))
const organicScript = `() => {
const root = document.querySelector("#search") || document
const anchors = Array.from(root.querySelectorAll("a[href]"))
const results = []
for (const a of anchors) {
const href = a.getAttribute("href") || ""
if (!href.startsWith("http")) continue
if (href.includes("google.") || href.includes("webcache.googleusercontent.com")) continue
const h3 = a.querySelector("h3")
if (!h3) continue
const title = (h3.textContent || "").trim()
if (!title) continue
if (!results.some((x) => x.href === href)) {
results.push({ href, title })
}
}
return results
}`
const run = async () => {
const out = { pages: [] }
out.open = await browser.open({
headless: false,
width: 1366,
height: 900,
browserKind: "testing",
persistentProfile: true,
userDataDir: "/home/pancho/.chromium-perfil-google",
startUrl: "https://www.google.es",
recordVideo: false,
})
out.cookies = await browser.handleConsent("accept").catch((e) => ({ error: e.message }))
await browser.type('textarea[name="q"]', query)
await browser.press("Enter")
await browser.waitFor({ for: "timeout", value: "1200" })
out.afterSearch = await browser.observe().catch((e) => ({ error: e.message }))
if ((out.afterSearch.url || "").includes("/sorry/index")) {
out.humanAttempt = await browser.handleHumanCheck().catch((e) => ({ error: e.message }))
await browser.waitFor({ for: "timeout", value: "3000" })
out.afterChallenge = await browser.observe().catch((e) => ({ error: e.message }))
}
let found = null
let globalRank = 0
for (let pageNo = 1; pageNo <= maxPages; pageNo += 1) {
await browser.waitFor({ for: "timeout", value: "800" })
const currentUrl = browser.getState().currentUrl || ""
if (!currentUrl.includes("/search")) {
out.pages.push({ page: pageNo, status: "not_on_search", url: currentUrl })
break
}
const extracted = await browser.evaluate(organicScript, {}).catch(() => ({ result: [] }))
const organic = Array.isArray(extracted.result) ? extracted.result : []
const idx = organic.findIndex((x) => {
try {
return new URL(String(x.href)).hostname.includes(targetDomain)
} catch {
return String(x.href).includes(targetDomain)
}
})
out.pages.push({
page: pageNo,
url: currentUrl,
organicCount: organic.length,
found: idx >= 0,
positionOnPage: idx >= 0 ? idx + 1 : null,
})
if (idx >= 0) {
globalRank += idx + 1
found = {
page: pageNo,
positionOnPage: idx + 1,
globalRank,
href: organic[idx].href,
title: organic[idx].title,
}
break
}
globalRank += organic.length
let moved = false
for (const sel of ["#pnnext", "a#pnnext", "a[aria-label*='siguiente' i]", "a[aria-label*='next' i]"]) {
const ex = await browser.query({ selector: sel, mode: "exists" }).catch(() => ({ result: false }))
if (ex.result === true) {
await browser.click(sel).catch(() => null)
moved = true
break
}
}
if (!moved) {
const tried = await browser
.evaluate(
`() => {
const nodes = Array.from(document.querySelectorAll("a,button,[role=button]"))
for (const n of nodes) {
const t = (n.innerText || "").trim()
if (!t) continue
if (t === "Siguiente" || t.includes("Siguiente") || t === "Next" || t.includes("Next")) {
n.click()
return { clicked: true, text: t }
}
}
return { clicked: false, text: null }
}`,
{},
)
.catch(() => ({ result: { clicked: false, text: null } }))
out.pages[out.pages.length - 1].nextFallback = tried.result
if (!tried.result?.clicked) {
break
}
}
await sleep(1400)
}
out.result = found || { found: false }
if (found?.href) {
await browser.navigate(found.href, "domcontentloaded")
await browser.waitFor({ for: "timeout", value: "1000" })
out.destinationObserve = await browser.observe().catch((e) => ({ error: e.message }))
out.destinationSnapshot = await browser.snapshot({ label: "google-puertas-destination", fullPage: false })
}
out.finalObserve = await browser.observe().catch((e) => ({ error: e.message }))
out.close = await browser.close()
console.log(JSON.stringify(out, null, 2))
}
run().catch(async (err) => {
console.error("GOOGLE_PUERTAS_ERROR", err?.message || err)
try {
const snap = await browser.snapshot({ label: "google-puertas-error", fullPage: true })
console.error("ERROR_SNAPSHOT", snap.filePath)
} catch {
// ignore
}
try {
await browser.close()
} catch {
// ignore
}
process.exit(1)
})