|
1 | 1 | const cheerio = require("cheerio"); |
2 | | -const { fetchText } = require("../../common/utils"); |
3 | | -const { domain } = require("./attributes"); |
| 2 | +const getPageWithPlaywright = require("../../common/get-page-with-playwright"); |
| 3 | +const { id, domain } = require("./attributes"); |
| 4 | + |
| 5 | +function isCloudflareChallenge(html) { |
| 6 | + return ( |
| 7 | + html.includes("challenges.cloudflare.com") || |
| 8 | + html.includes("cf_chl_opt") || |
| 9 | + html.includes("Enable JavaScript and cookies to continue") |
| 10 | + ); |
| 11 | +} |
4 | 12 |
|
5 | 13 | async function retrieve() { |
6 | 14 | const movieListPageUrl = `${domain}/search_film_programmes/`; |
7 | | - const movieListPage = await fetchText(movieListPageUrl); |
8 | 15 |
|
9 | | - const $ = cheerio.load(movieListPage); |
| 16 | + return getPageWithPlaywright(movieListPageUrl, id, async (page) => { |
| 17 | + await page.waitForLoadState(); |
| 18 | + const movieListPage = await page.content(); |
10 | 19 |
|
11 | | - const moviePageUrls = new Set(); |
12 | | - $(".inner_block_3 a").each(function () { |
13 | | - const url = $(this).attr("href"); |
14 | | - moviePageUrls.add(url); |
15 | | - }); |
| 20 | + if (isCloudflareChallenge(movieListPage)) { |
| 21 | + console.log( |
| 22 | + " - ⚠️ Cloudflare challenge detected - falling back to sourced events", |
| 23 | + ); |
| 24 | + return { movieListPage: "", moviePages: {} }; |
| 25 | + } |
16 | 26 |
|
17 | | - const moviePages = {}; |
18 | | - for (const moviePageUrl of [...moviePageUrls]) { |
19 | | - moviePages[moviePageUrl] = await fetchText(moviePageUrl); |
20 | | - } |
| 27 | + const $ = cheerio.load(movieListPage); |
| 28 | + const moviePageUrls = new Set(); |
| 29 | + $(".inner_block_3 a").each(function () { |
| 30 | + const url = $(this).attr("href"); |
| 31 | + moviePageUrls.add(url); |
| 32 | + }); |
21 | 33 |
|
22 | | - return { |
23 | | - movieListPage, |
24 | | - moviePages, |
25 | | - }; |
| 34 | + const moviePages = {}; |
| 35 | + for (const moviePageUrl of [...moviePageUrls]) { |
| 36 | + await page.goto(moviePageUrl); |
| 37 | + await page.waitForLoadState("networkidle"); |
| 38 | + moviePages[moviePageUrl] = await page.content(); |
| 39 | + } |
| 40 | + |
| 41 | + return { movieListPage, moviePages }; |
| 42 | + }); |
26 | 43 | } |
27 | 44 |
|
28 | 45 | module.exports = retrieve; |
0 commit comments