2024-10-24 07:14:00 -04:00
|
|
|
import checkSanitizeOptions from './checkSanitizeOptions.mjs'
|
|
|
|
import Scrape from './scrape.mjs'
|
2024-11-04 04:09:11 -05:00
|
|
|
import { fileURLToPath } from 'node:url'
|
2024-10-24 07:14:00 -04:00
|
|
|
|
|
|
|
const { getAllPagesAndCache, scrapeAndCachePage } = Scrape
|
2021-05-07 09:16:12 -04:00
|
|
|
|
|
|
|
async function main() {
|
|
|
|
const BASE_URL = process.argv.pop()
|
|
|
|
if (!BASE_URL.startsWith('http')) {
|
|
|
|
throw new Error(
|
2024-10-24 07:14:00 -04:00
|
|
|
'Usage: node scripts/learn/checkSanitize/index.mjs https://LEARN_WIKI'
|
2021-05-07 09:16:12 -04:00
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
const pages = await getAllPagesAndCache(BASE_URL)
|
|
|
|
|
|
|
|
for (const page of pages) {
|
|
|
|
try {
|
|
|
|
const parsed = await scrapeAndCachePage(BASE_URL, page)
|
|
|
|
|
|
|
|
const title = parsed.title
|
|
|
|
const text = parsed.text ? parsed.text['*'] : ''
|
|
|
|
|
|
|
|
checkSanitizeOptions(page, title, text)
|
|
|
|
} catch (e) {
|
|
|
|
console.error('---')
|
|
|
|
console.error(page, e)
|
|
|
|
throw e
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-10-24 07:14:00 -04:00
|
|
|
if (fileURLToPath(import.meta.url) === process.argv[1]) {
|
|
|
|
try {
|
|
|
|
await main()
|
|
|
|
process.exit(0)
|
|
|
|
} catch (error) {
|
|
|
|
console.error(error)
|
2021-05-07 09:16:12 -04:00
|
|
|
process.exit(1)
|
2024-10-24 07:14:00 -04:00
|
|
|
}
|
2021-05-07 09:16:12 -04:00
|
|
|
}
|