-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWebScrapingFB.py
More file actions
70 lines (51 loc) · 1.84 KB
/
WebScrapingFB.py
File metadata and controls
70 lines (51 loc) · 1.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
######################### API STUFF
import requests
# Replace this with your API token
API_TOKEN = 'apify_api_Hui1YjAM1cjLRzmYJ9F65IXyoulxUi4GDNGu'
# Corrected Actor ID for the Facebook Page Scraper
ACTOR_ID = 'apify~facebook-posts-scraper'
# The Facebook page you want to scrape
START_URL = 'https://www.facebook.com/noticiasdeaveiro/?locale=pt_PT'
# API endpoint to trigger the scraper
url = f'https://api.apify.com/v2/acts/{ACTOR_ID}/runs?token={API_TOKEN}'
# Headers
headers = {'Content-Type': 'application/json'}
# Body (scraper configuration)
payload = {
"startUrls": [
{ "url": START_URL }
],
"maxPosts": 10, # Max posts to scrape
"includeComments": False # Set to True if you want comments
}
# Make the request to start the scraper
response = requests.post(url, headers=headers, json=payload)
# Get the run ID from the response
data = response.json()
# print(type(data))
# print("")
# print(data)
DATASET_ID = 'HZ2Pb7WSKtphMNyKX'
API_TOKEN = 'apify_api_Hui1YjAM1cjLRzmYJ9F65IXyoulxUi4GDNGu'
# API endpoint to get dataset results
url = f'https://api.apify.com/v2/datasets/{DATASET_ID}/items?token={API_TOKEN}'
# Make the request to fetch the results
response = requests.get(url)
# Get the data in JSON format
data = response.json()
# Print the titles of the posts
for post in data:
# print(post.get('postText', 'No title available'))
print("-------------------------------------------------------------------------------------------")
try:
print('text:'+ post['text'])
except:
print('noText')
try:
print('previewTitle: '+post['previewTitle'])
except:
print('noPreviewTitle')
try:
print('previewDescription: '+post['previewDescription'])
except:
print('noPreviewDescription')