A Server that Returns Website Screenshots Using Headless Chromium with Bottle and Gevent

2021-08-20 21:27 (3 years ago) ytyng

This is the code for a web application that takes a URL included as a parameter in the request URL, requests it with a headless Chrome, takes a screenshot, and returns the response.

The service is provided using Python, Bottle, and gevent.

from gevent import monkey
monkey.patch_all()

import hashlib
import os
import subprocess

from bottle import route, run, request, abort, HTTPResponse

thumbnail_dir = '/tmp/web-thumbnail'

veil_file_dir = os.path.join(os.path.dirname(__file__), 'static/veil.png')


def get_screenshot_filename(url):
h = hashlib.sha256(url.encode('utf-8', 'ignore'))
return os.path.join(thumbnail_dir, '{}.png'.format(h.hexdigest()))


def take_shapshot(url):
"""
Generates a snapshot using headless Chromium
"""
command = [
'/usr/bin/chromium-browser',
'--no-sandbox',
'--headless',
'--disable-gpu',
'--no-zygote',
# '--disable-software-rasterizer',
'--screenshot={}'.format(get_screenshot_filename(url)),
'--hide-scrollbars',
'--window-size=400,400',
'--lang=ja-JP',
url,
]

return subprocess.check_call(command)


@route('/')
def index():
"""
Thumbnail generation page
"""
url = request.query.u
if not url:
abort(400, 'Please specify parameter u')
return
append_headers = []
screenshot_filename = get_screenshot_filename(url)
if os.path.exists(screenshot_filename):
append_headers.append(
('X-CacheFileResult', f'Cache file found:{screenshot_filename}'))
else:
ret = take_shapshot(url)
append_headers.append(('X-TakeResult', str(ret)))

if os.path.exists(screenshot_filename):
with open(screenshot_filename, 'rb') as fp:
data = fp.read()
else:
# Could not create the file
with open(veil_file_dir, 'rb') as fp:
data = fp.read()
append_headers.append(
('X-CacheFileResult', f'NotFound:{screenshot_filename}'))
r = HTTPResponse(status=200, body=data)
r.set_header('Content-type', 'image/png')
for header_name, header_value in append_headers:
r.set_header(header_name, header_value)
return r


run(host='0.0.0.0', port=8080, server='gevent')
Currently unrated
The author runs the application development company Cyberneura.
We look forward to discussing your development needs.

Archive

2025
2024
2023
2022
2021
2020
2019
2018
2017
2016
2015
2014
2013
2012
2011