Remove coordinates from output

Signed-off-by: Evan Lloyd New-Schmidt <evan@new-schmidt.com>
This commit is contained in:
Evan Lloyd New-Schmidt 2023-08-15 10:42:25 -04:00 committed by Evan Lloyd New-Schmidt
parent c4028e52fa
commit 6c02f4a569
2 changed files with 21 additions and 7 deletions

View file

@ -71,6 +71,8 @@ static ELEMENT_DENY_LIST: Lazy<Selector> = Lazy::new(|| {
"embed",
// Pronunciation "listen" link/button.
r#"span[typeof="mw:Transclusion"][data-mw*="\"audio\":"]"#,
// Coordinates transclusion.
"span#coordinates",
]
.join(", "),
)
@ -121,9 +123,7 @@ pub fn simplify_html(document: &mut Html, lang: &str) {
.descendants()
.filter_map(ElementRef::wrap)
{
if (ELEMENT_DENY_LIST.matches(&el) || is_empty_or_whitespace(&el))
&& !ELEMENT_ALLOW_LIST.matches(&el)
{
if ELEMENT_DENY_LIST.matches(&el) && !ELEMENT_ALLOW_LIST.matches(&el) {
to_remove.push(el.id());
}
}
@ -131,6 +131,8 @@ pub fn simplify_html(document: &mut Html, lang: &str) {
remove_empty_sections(document);
remove_empty(document);
remove_non_element_nodes(document);
expand_links(document);
@ -187,6 +189,22 @@ fn remove_toplevel_whitespace(document: &mut Html) {
remove_ids(document, to_remove.drain(..));
}
fn remove_empty(document: &mut Html) {
let mut to_remove = Vec::new();
for el in document
.root_element()
.descendants()
.filter_map(ElementRef::wrap)
{
if is_empty_or_whitespace(&el) {
to_remove.push(el.id());
}
}
remove_ids(document, to_remove.drain(..));
}
fn remove_empty_sections(document: &mut Html) {
let mut to_remove = Vec::new();
for el in document.select(&HEADERS) {

View file

@ -1,7 +1,3 @@
<p>
<span id="coordinates">
Coordinates: <span title="Maps, aerial photos, and other data for this location">53°0611.4″N08°4629.2″W</span> / <span title="Maps, aerial photos, and other data for this location">53.103167°N 8.774778°W</span> / 53.103167; -8.774778</span>
</p>
<p>
<b>
Thoor Ballylee Castle</b> (Irish<i>Túr Bhaile Uí Laí</i>) is a fortified, 15th-century Anglo-Normantower house built by the septsde Burgo, or Burke, near the town of Gort in County Galway, Ireland. It is also known as <i>Yeats' Tower</i> because it was once owned and inhabited by the poet William Butler Yeats.