mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2025-12-27 22:26:00 +00:00
37 lines
1.4 KiB
Python
37 lines
1.4 KiB
Python
import os
|
|
import sqlite3
|
|
|
|
from .fixtures import *
|
|
|
|
def test_title_is_extracted(tmp_path, process, disable_extractors_dict):
|
|
"""Test that title is extracted from the page."""
|
|
disable_extractors_dict.update({"SAVE_TITLE": "true"})
|
|
subprocess.run(['archivebox', 'add', 'https://example.com'],
|
|
capture_output=True, env=disable_extractors_dict)
|
|
|
|
os.chdir(tmp_path)
|
|
conn = sqlite3.connect("index.sqlite3")
|
|
conn.row_factory = sqlite3.Row
|
|
c = conn.cursor()
|
|
c.execute("SELECT title from core_snapshot")
|
|
snapshot = c.fetchone()
|
|
conn.close()
|
|
|
|
assert snapshot[0] is not None
|
|
assert "Example" in snapshot[0]
|
|
|
|
def test_title_is_htmlencoded_in_index_html(tmp_path, process, disable_extractors_dict):
|
|
"""
|
|
https://github.com/ArchiveBox/ArchiveBox/issues/330
|
|
Unencoded content should not be rendered as it facilitates xss injections
|
|
and breaks the layout.
|
|
"""
|
|
disable_extractors_dict.update({"SAVE_TITLE": "true"})
|
|
subprocess.run(['archivebox', 'add', 'https://example.com'],
|
|
capture_output=True, env=disable_extractors_dict)
|
|
list_process = subprocess.run(["archivebox", "list", "--html"], capture_output=True)
|
|
|
|
# Should not contain unescaped HTML tags in output
|
|
output = list_process.stdout.decode("utf-8")
|
|
assert "https://example.com" in output
|