-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwebapp_scraper.py
More file actions
38 lines (31 loc) · 1.08 KB
/
webapp_scraper.py
File metadata and controls
38 lines (31 loc) · 1.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
"""
### Downloads JavaScript generated content from web apps by using web browser
"""
import sys
from playwright.sync_api import sync_playwright, Error
from utils import InternetNode
def main():
with InternetNode(__doc__) as node:
if not node.url:
raise SystemExit('Please provide at least one URL')
with sync_playwright() as playwright:
browser = playwright.chromium.launch()
try:
page = browser.new_page()
page.goto(node.url, wait_until='networkidle')
except Error as exc:
if 'net::ERR_ABORTED' in str(exc):
# see https://github.com/puppeteer/puppeteer/issues/2794
sys.stderr.write(f'Unable to download a file: {exc}')
sys.exit(2)
else:
node.insert(
page.url,
page.title(),
page.content(),
)
node.flush()
finally:
browser.close()
if __name__ == '__main__':
main()