A Server that Returns Website Screenshots Using Headless Chromium with Bottle and Gevent

2021-08-20 21:27 (3 years ago) ytyng

This is the code for a web application that takes a URL included as a parameter in the request URL, requests it with a headless Chrome, takes a screenshot, and returns the response.

The service is provided using Python, Bottle, and gevent.

from gevent import monkey
monkey.patch_all()

import hashlib
import os
import subprocess

from bottle import route, run, request, abort, HTTPResponse

thumbnail_dir = '/tmp/web-thumbnail'

veil_file_dir = os.path.join(os.path.dirname(__file__), 'static/veil.png')


def get_screenshot_filename(url):
h = hashlib.sha256(url.encode('utf-8', 'ignore'))
return os.path.join(thumbnail_dir, '{}.png'.format(h.hexdigest()))


def take_shapshot(url):
"""
Generates a snapshot using headless Chromium
"""
command = [
'/usr/bin/chromium-browser',
'--no-sandbox',
'--headless',
'--disable-gpu',
'--no-zygote',
# '--disable-software-rasterizer',
'--screenshot={}'.format(get_screenshot_filename(url)),
'--hide-scrollbars',
'--window-size=400,400',
'--lang=ja-JP',
url,
]

return subprocess.check_call(command)


@route('/')
def index():
"""
Thumbnail generation page
"""
url = request.query.u
if not url:
abort(400, 'Please specify parameter u')
return
append_headers = []
screenshot_filename = get_screenshot_filename(url)
if os.path.exists(screenshot_filename):
append_headers.append(
('X-CacheFileResult', f'Cache file found:{screenshot_filename}'))
else:
ret = take_shapshot(url)
append_headers.append(('X-TakeResult', str(ret)))

if os.path.exists(screenshot_filename):
with open(screenshot_filename, 'rb') as fp:
data = fp.read()
else:
# Could not create the file
with open(veil_file_dir, 'rb') as fp:
data = fp.read()
append_headers.append(
('X-CacheFileResult', f'NotFound:{screenshot_filename}'))
r = HTTPResponse(status=200, body=data)
r.set_header('Content-type', 'image/png')
for header_name, header_value in append_headers:
r.set_header(header_name, header_value)
return r


run(host='0.0.0.0', port=8080, server='gevent')
Currently unrated

Comments

Archive

2024
2023
2022
2021
2020
2019
2018
2017
2016
2015
2014
2013
2012
2011