Add osm tag file parsing #23

Merged
newsch merged 6 commits from osm-tags into main 2023-08-10 13:37:59 +00:00
13 changed files with 1252 additions and 516 deletions

355
Cargo.lock generated
View file

@ -112,7 +112,7 @@ dependencies = [
"cc",
"cfg-if",
"libc",
"miniz_oxide",
"miniz_oxide 0.6.2",
"object",
"rustc-demangle",
]
@ -123,6 +123,12 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
[[package]]
name = "byteorder"
version = "1.4.3"
@ -160,7 +166,7 @@ checksum = "72394f3339a76daf211e57d4bcb374410f3965dcc606dd0e03738c7888766980"
dependencies = [
"anstream",
"anstyle",
"bitflags",
"bitflags 1.3.2",
"clap_lex",
"strsim",
]
@ -174,7 +180,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.17",
"syn 2.0.28",
]
[[package]]
@ -195,6 +201,58 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
[[package]]
name = "crc32fast"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
dependencies = [
"cfg-if",
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
dependencies = [
"autocfg",
"cfg-if",
"crossbeam-utils",
"memoffset",
"scopeguard",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
dependencies = [
"cfg-if",
]
[[package]]
name = "cssparser"
version = "0.29.6"
@ -222,6 +280,27 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "csv"
version = "1.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
dependencies = [
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
dependencies = [
"memchr",
]
[[package]]
name = "derive_more"
version = "0.99.17"
@ -256,6 +335,12 @@ version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591"
[[package]]
name = "either"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "env_logger"
version = "0.10.0"
@ -290,6 +375,22 @@ dependencies = [
"libc",
]
[[package]]
name = "fastrand"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764"
[[package]]
name = "flate2"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743"
dependencies = [
"crc32fast",
"miniz_oxide 0.7.1",
]
[[package]]
name = "form_urlencoded"
version = "1.1.0"
@ -355,6 +456,12 @@ version = "0.27.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4"
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "heck"
version = "0.4.1"
@ -397,6 +504,16 @@ dependencies = [
"unicode-normalization",
]
[[package]]
name = "indexmap"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
"autocfg",
"hashbrown",
]
[[package]]
name = "io-lifetimes"
version = "1.0.11"
@ -416,7 +533,7 @@ checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f"
dependencies = [
"hermit-abi",
"io-lifetimes",
"rustix",
"rustix 0.37.19",
"windows-sys 0.48.0",
]
@ -428,9 +545,9 @@ checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
[[package]]
name = "libc"
version = "0.2.144"
version = "0.2.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1"
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
[[package]]
name = "linux-raw-sys"
@ -438,6 +555,12 @@ version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
[[package]]
name = "linux-raw-sys"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503"
[[package]]
name = "lock_api"
version = "0.4.9"
@ -486,6 +609,24 @@ version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "memmap2"
version = "0.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327"
dependencies = [
"libc",
]
[[package]]
name = "memoffset"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
dependencies = [
"autocfg",
]
[[package]]
name = "miniz_oxide"
version = "0.6.2"
@ -495,6 +636,15 @@ dependencies = [
"adler",
]
[[package]]
name = "miniz_oxide"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
dependencies = [
"adler",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.4"
@ -507,6 +657,16 @@ version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
[[package]]
name = "num_cpus"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
dependencies = [
"hermit-abi",
"libc",
]
[[package]]
name = "object"
version = "0.30.3"
@ -522,13 +682,17 @@ version = "0.0.0"
dependencies = [
"anyhow",
"clap",
"csv",
"ego-tree",
"env_logger",
"log",
"once_cell",
"osmpbf",
"rayon",
"scraper",
"serde",
"serde_json",
"thiserror",
"url",
"urlencoding",
]
@ -539,6 +703,20 @@ version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "osmpbf"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3bec2671f8eb1e9a353adfe8aafe44c9c5207e0012d469a4b61fb7bf33adf37"
dependencies = [
"byteorder",
"flate2",
"memmap2",
"protobuf",
"protobuf-codegen",
"rayon",
]
[[package]]
name = "parking_lot"
version = "0.12.1"
@ -557,7 +735,7 @@ checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"redox_syscall 0.2.16",
"smallvec",
"windows-sys 0.45.0",
]
@ -680,18 +858,69 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
[[package]]
name = "proc-macro2"
version = "1.0.59"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b"
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.28"
name = "protobuf"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
checksum = "b55bad9126f378a853655831eb7363b7b01b81d19f8cb1218861086ca4a1a61e"
dependencies = [
"once_cell",
"protobuf-support",
"thiserror",
]
[[package]]
name = "protobuf-codegen"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0dd418ac3c91caa4032d37cb80ff0d44e2ebe637b2fb243b6234bf89cdac4901"
dependencies = [
"anyhow",
"once_cell",
"protobuf",
"protobuf-parse",
"regex",
"tempfile",
"thiserror",
]
[[package]]
name = "protobuf-parse"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d39b14605eaa1f6a340aec7f320b34064feb26c93aec35d6a9a2272a8ddfa49"
dependencies = [
"anyhow",
"indexmap",
"log",
"protobuf",
"protobuf-support",
"tempfile",
"thiserror",
"which",
]
[[package]]
name = "protobuf-support"
version = "3.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5d4d7b8601c814cfb36bcebb79f0e61e45e1e93640cf778837833bbed05c372"
dependencies = [
"thiserror",
]
[[package]]
name = "quote"
version = "1.0.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965"
dependencies = [
"proc-macro2",
]
@ -777,13 +1006,44 @@ dependencies = [
"rand_core 0.5.1",
]
[[package]]
name = "rayon"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-utils",
"num_cpus",
]
[[package]]
name = "redox_syscall"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
dependencies = [
"bitflags",
"bitflags 1.3.2",
]
[[package]]
name = "redox_syscall"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
dependencies = [
"bitflags 1.3.2",
]
[[package]]
@ -824,11 +1084,24 @@ version = "0.37.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d"
dependencies = [
"bitflags",
"bitflags 1.3.2",
"errno",
"io-lifetimes",
"libc",
"linux-raw-sys",
"linux-raw-sys 0.3.8",
"windows-sys 0.48.0",
]
[[package]]
name = "rustix"
version = "0.38.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "172891ebdceb05aa0005f533a6cbfca599ddd7d966f6f5d4d9b2e70478e70399"
dependencies = [
"bitflags 2.3.3",
"errno",
"libc",
"linux-raw-sys 0.4.5",
"windows-sys 0.48.0",
]
@ -867,7 +1140,7 @@ version = "0.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c37578180969d00692904465fb7f6b3d50b9a2b952b87c23d0e2e5cb5013416"
dependencies = [
"bitflags",
"bitflags 1.3.2",
"cssparser",
"derive_more",
"fxhash",
@ -902,7 +1175,7 @@ checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.17",
"syn 2.0.28",
]
[[package]]
@ -989,15 +1262,28 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.17"
version = "2.0.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45b6ddbb36c5b969c182aec3c4a0bce7df3fbad4b77114706a49aacc80567388"
checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "tempfile"
version = "3.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc02fddf48964c42031a0b3fe0428320ecf3a73c401040fc0096f97794310651"
dependencies = [
"cfg-if",
"fastrand",
"redox_syscall 0.3.5",
"rustix 0.38.7",
"windows-sys 0.48.0",
]
[[package]]
name = "tendril"
version = "0.4.3"
@ -1018,6 +1304,26 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "thiserror"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.28",
]
[[package]]
name = "tinyvec"
version = "1.6.0"
@ -1107,6 +1413,17 @@ version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "which"
version = "4.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269"
dependencies = [
"either",
"libc",
"once_cell",
]
[[package]]
name = "winapi"
version = "0.3.9"

View file

@ -10,13 +10,17 @@ default-run = "om-wikiparser"
[dependencies]
anyhow = { version = "1.0.71", features = ["backtrace"] }
clap = { version = "4.3.2", features = ["derive"] }
csv = "1.2.2"
ego-tree = "0.6.2"
env_logger = "0.10.0"
log = "0.4.18"
once_cell = "1.18.0"
osmpbf = "0.3.1"
rayon = "1.7.0"
scraper = "0.16.0"
serde = { version = "1.0.163", features = ["derive"] }
serde_json = "1.0.96"
thiserror = "1.0.44"
url = "2.3.1"
urlencoding = "2.1.2"

View file

@ -13,7 +13,7 @@ It defines article sections that are not important for users and should be remov
## Usage
To use with the map generator, see the [`run.sh` script](run.sh) and its own help documentation.
It handles preparing the inputs, using multiple dumps, and re-running to convert titles to QIDs and extract them across languages.
It handles extracting the tags, using multiple dumps, and re-running to convert titles to QIDs and extract them across languages.
To run the wikiparser manually or for development, see below.
@ -29,41 +29,64 @@ Run the program with the `--help` flag to see all supported arguments.
```
$ cargo run --release -- --help
Extract article HTML from Wikipedia Enterprise HTML dumps.
Extract articles from Wikipedia Enterprise HTML dumps
Expects an uncompressed dump connected to stdin.
Usage: om-wikiparser <COMMAND>
Usage: om-wikiparser [OPTIONS] <OUTPUT_DIR>
Commands:
get-articles Extract, filter, and simplify article HTML from Wikipedia Enterprise HTML dumps
get-tags Extract wikidata/wikipedia tags from an OpenStreetMap PBF dump
simplify Apply the same html article simplification used when extracting articles to stdin, and write it to stdout
help Print this message or the help of the given subcommand(s)
Options:
-h, --help Print help (see more with '--help')
-V, --version Print version
```
Each command has its own additional help:
```
$ cargo run -- get-articles --help
Extract, filter, and simplify article HTML from Wikipedia Enterprise HTML dumps.
Expects an uncompressed dump (newline-delimited JSON) connected to stdin.
Usage: om-wikiparser get-articles [OPTIONS] <OUTPUT_DIR>
Arguments:
<OUTPUT_DIR>
Directory to write the extracted articles to
Options:
--write-new-ids <WRITE_NEW_IDS>
--write-new-qids <FILE>
Append to the provided file path the QIDs of articles matched by title but not QID.
Use this to save the QIDs of articles you know the url of, but not the QID. The same path can later be passed to the `--wikidata-ids` option to extract them from another language's dump.
Use this to save the QIDs of articles you know the url of, but not the QID. The same path can later be passed to the `--wikidata-qids` option to extract them from another language's dump. Writes are atomicly appended to the file, so the same path may be used by multiple concurrent instances.
-h, --help
Print help (see a summary with '-h')
-V, --version
Print version
FILTERS:
--wikidata-ids <WIKIDATA_IDS>
--osm-tags <FILE.tsv>
Path to a TSV file that contains one or more of `wikidata`, `wikipedia` columns.
This can be generated with the `get-tags` command or `osmconvert --csv-headline --csv 'wikidata wikipedia'`.
--wikidata-qids <FILE>
Path to file that contains a Wikidata QID to extract on each line (e.g. `Q12345`)
--wikipedia-urls <WIKIPEDIA_URLS>
--wikipedia-urls <FILE>
Path to file that contains a Wikipedia article url to extract on each line (e.g. `https://lang.wikipedia.org/wiki/Article_Title`)
```
It takes as inputs:
- A wikidata enterprise JSON dump, extracted and connected to `stdin`.
- A file of Wikidata QIDs to extract, one per line (e.g. `Q12345`), passed as the CLI flag `--wikidata-ids`.
- A file of Wikipedia article titles to extract, one per line (e.g. `https://$LANG.wikipedia.org/wiki/$ARTICLE_TITLE`), passed as a CLI flag `--wikipedia-urls`.
- A directory to write the extracted articles to, as a CLI argument.
- Any number of filters passed:
- A TSV file of wikidata qids and wikipedia urls, created by the `get-tags` command or `osmconvert`, passed as the CLI flag `--osm-tags`.
- A file of Wikidata QIDs to extract, one per line (e.g. `Q12345`), passed as the CLI flag `--wikidata-ids`.
- A file of Wikipedia article titles to extract, one per line (e.g. `https://$LANG.wikipedia.org/wiki/$ARTICLE_TITLE`), passed as a CLI flag `--wikipedia-urls`.
As an example of manual usage with the map generator:
- Assuming this program is installed to `$PATH` as `om-wikiparser`.
@ -74,7 +97,7 @@ As an example of manual usage with the map generator:
```shell
# Transform intermediate files from generator.
cut -f 2 id_to_wikidata.csv > wikidata_ids.txt
cut -f 2 id_to_wikidata.csv > wikidata_qids.txt
tail -n +2 wiki_urls.txt | cut -f 3 > wikipedia_urls.txt
# Enable backtraces in errors and panics.
export RUST_BACKTRACE=1
@ -83,9 +106,38 @@ export RUST_LOG=om_wikiparser=debug
# Begin extraction.
for dump in $DUMP_DOWNLOAD_DIR/*-ENTERPRISE-HTML.json.tar.gz
do
tar xzf $dump | om-wikiparser \
--wikidata-ids wikidata_ids.txt \
tar xzf $dump | om-wikiparser get-articles \
--wikidata-ids wikidata_qids.txt \
--wikipedia-urls wikipedia_urls.txt \
--write-new-qids new_qids.txt \
descriptions/
done
# Extract discovered QIDs.
for dump in $DUMP_DOWNLOAD_DIR/*-ENTERPRISE-HTML.json.tar.gz
do
tar xzf $dump | om-wikiparser get-articles \
--wikidata-ids new_qids.txt \
descriptions/
done
```
Alternatively, extract the tags directly from a `.osm.pbf` file (referenced here as `planet-latest.osm.pbf`):
```shell
# Extract tags
om-wikiparser get-tags planet-latest.osm.pbf > osm_tags.tsv
# Begin extraction.
for dump in $DUMP_DOWNLOAD_DIR/*-ENTERPRISE-HTML.json.tar.gz
do
tar xzf $dump | om-wikiparser get-articles \
--osm-tags osm_tags.tsv \
--write-new-qids new_qids.txt \
descriptions/
done
# Extract discovered QIDs.
for dump in $DUMP_DOWNLOAD_DIR/*-ENTERPRISE-HTML.json.tar.gz
do
tar xzf $dump | om-wikiparser get-articles \
--wikidata-ids new_qids.txt \
descriptions/
done
```

View file

@ -4,22 +4,21 @@ use std::{collections::HashSet, str::FromStr};
extern crate om_wikiparser;
extern crate test;
biodranik commented 2023-08-09 18:50:27 +00:00 (Migrated from github.com)
Review

Why is it not in one line?

Why is it not in one line?
biodranik commented 2023-08-09 18:50:50 +00:00 (Migrated from github.com)
Review

A constant to avoid copy-paste?

A constant to avoid copy-paste?
newsch commented 2023-08-09 19:16:18 +00:00 (Migrated from github.com)
Review

I'm not sure, renaming it to the shorter Title must have altered rustfmt's heuristics.

I'm not sure, renaming it to the shorter `Title` must have altered `rustfmt`'s heuristics.
use om_wikiparser::wm::{Qid, Title};
const TITLE: &str = "https://en.wikipedia.org/wiki/Article_Title";
const QID: &str = "Q123456789";
#[bench]
fn parse_wikipedia(b: &mut test::Bencher) {
b.iter(|| {
let title = om_wikiparser::wm::WikipediaTitleNorm::from_url(
"https://en.wikipedia.org/wiki/Article_Title",
)
.unwrap();
Title::from_url(TITLE).unwrap();
});
}
#[bench]
fn hash_wikipedia(b: &mut test::Bencher) {
let title = om_wikiparser::wm::WikipediaTitleNorm::from_url(
"https://en.wikipedia.org/wiki/Article_Title",
)
.unwrap();
let title = Title::from_url(TITLE).unwrap();
let mut set = HashSet::new();
b.iter(|| {
set.insert(&title);
@ -29,13 +28,13 @@ fn hash_wikipedia(b: &mut test::Bencher) {
#[bench]
fn parse_wikidata(b: &mut test::Bencher) {
b.iter(|| {
let qid = om_wikiparser::wm::WikidataQid::from_str("Q123456789").unwrap();
Qid::from_str(QID).unwrap();
});
}
#[bench]
fn hash_wikidata(b: &mut test::Bencher) {
let qid = om_wikiparser::wm::WikidataQid::from_str("Q123456789").unwrap();
let qid = Qid::from_str(QID).unwrap();
let mut set = HashSet::new();
b.iter(|| {
set.insert(&qid);

41
run.sh
View file

@ -1,17 +1,16 @@
#! /usr/bin/env bash
# shellcheck disable=SC2016 # Backticks not used as expansions in documentation.
USAGE='Usage: ./run.sh [-h] <BUILD_DIR> <DUMP_FILE.json.tar.gz> [<DUMP_FILE.json.tar.gz>...]
USAGE='Usage: ./run.sh [-h] <BUILD_DIR> <OSM_FILE.osm.pbf> <DUMP_FILE.json.tar.gz> [<DUMP_FILE.json.tar.gz>...]
A convenience script to run the wikiparser with the maps generator as a drop-in replacement for the descriptions scraper.
Arguments:
<BUILD_DIR> An existing directory to place descriptions in.
The `id_to_wikidata.csv` and `wiki_urls.txt` files output by the
maps generator must be placed in this directory before running.
The extracted articles will be placed in a `descriptions`
subdirectory within this directory.
The `intermediate_data` subfolder of a maps build directory may
be used for this. The same folder may be used for multiple runs.
<OSM_FILE> An OpenStreetMap dump in PBF format to extract tags from.
<DUMP_FILE> A wikipedia enterprise html dump. These take the form of
`enwiki-NS0-20230401-ENTERPRISE-HTML.json.tar.gz`. Multiple
dumps in the same language SHOULD NOT be provided, and will
@ -21,7 +20,7 @@ Options:
-h Print this help screen
1. Builds wikiparser.
2. Extracts wikidata ids and wikipedia urls from generator intermediate files `id_to_wikidata.csv` and `wiki_urls.txt`.
2. Extracts wikidata qids and wikipedia urls from OpenStreetMap pbf file (NOTE: this spawns as many threads as there are cores).
3. Runs wikiparser in parallel for all input dump files (NOTE: this currently starts 2 processes for each dump files).
For information on running the wikiparser manually, see README.md.
@ -43,8 +42,8 @@ do
done
shift $((OPTIND - 1))
if [ -z "${2-}" ]; then
echo "BUILD_DIR and at least one DUMP_FILE are required" >&2
if [ -z "${3-}" ]; then
echo "BUILD_DIR, OSM_FILE, and at least one DUMP_FILE are required" >&2
echo -n "$USAGE" >&2
exit 1
fi
@ -58,6 +57,13 @@ if [ ! -d "$BUILD_DIR" ]; then
exit 1
fi
OSM_FILE=$(readlink -f -- "$1")
shift
if [ ! -f "$OSM_FILE" ]; then
echo "OSM_FILE '$OSM_FILE' does not exist or is not a file" >&2
exit 1
fi
DUMP_FILES=()
while (( $# > 0 )); do
dump_file="$(readlink -f -- "$1")"
@ -91,16 +97,8 @@ wikiparser=$(pwd)/target/release/om-wikiparser
log "Changing to maps build dir '$BUILD_DIR'"
cd "$BUILD_DIR"
log "Transforming intermediate generator data"
for intermediate_file in id_to_wikidata.csv wiki_urls.txt; do
if [ ! -e "$intermediate_file" ]; then
echo -e "Cannot find intermediate generator file '$intermediate_file' in maps build dir '$BUILD_DIR/'\nWas the descriptions step run?" >&2
exit 1
fi
done
cut -f 2 id_to_wikidata.csv > wikidata_ids.txt
tail -n +2 wiki_urls.txt | cut -f 3 > wikipedia_urls.txt
log "Extracting tags from '$OSM_FILE'"
"$wikiparser" get-tags "$OSM_FILE" > osm_tags.tsv
# Enable backtraces in errors and panics.
export RUST_BACKTRACE=1
@ -129,10 +127,9 @@ trap 'kill_jobs' SIGINT SIGTERM EXIT
for dump in "${DUMP_FILES[@]}"; do
log "Extracting '$dump'"
tar xzOf "$dump" | "$wikiparser" \
--wikidata-ids wikidata_ids.txt \
--wikipedia-urls wikipedia_urls.txt \
--write-new-ids new_qids.txt \
tar xzOf "$dump" | "$wikiparser" get-articles \
--osm-tags osm_tags.tsv \
--write-new-qids new_qids.txt \
"$OUTPUT_DIR" &
done
@ -142,8 +139,8 @@ log "Beginning extraction of discovered QIDs"
# Extract new qids from other dumps in parallel.
for dump in "${DUMP_FILES[@]}"; do
tar xzOf "$dump" | "$wikiparser" \
--wikidata-ids new_qids.txt \
tar xzOf "$dump" | "$wikiparser" get-articles \
--wikidata-qids new_qids.txt \
"$OUTPUT_DIR" &
done

View file

@ -1,23 +0,0 @@
//! Apply html article simplification to stdin, and write it to stdout.
//!
//! Usage:
//! simplify_html < article.html > simplified.html
use std::io::{stdin, stdout, Read, Write};
use om_wikiparser::html::simplify;
fn main() -> anyhow::Result<()> {
env_logger::Builder::new()
.filter_level(log::LevelFilter::Info)
.parse_default_env()
.try_init()?;
let mut input = String::new();
stdin().read_to_string(&mut input)?;
let output = simplify(&input, "en");
stdout().write_all(output.as_bytes())?;
Ok(())
}

288
src/get_articles.rs Normal file
View file

@ -0,0 +1,288 @@
use std::{
fs::{self, File},
io::{stdin, BufRead, Write},
os::unix,
path::{Path, PathBuf},
};
use anyhow::{anyhow, bail, Context};
use om_wikiparser::{
html::simplify,
wm::{parse_osm_tag_file, parse_wikidata_file, parse_wikipedia_file, Page, Title},
};
/// Extract, filter, and simplify article HTML from Wikipedia Enterprise HTML dumps.
///
/// Expects an uncompressed dump (newline-delimited JSON) connected to stdin.
#[derive(clap::Args)]
pub struct Args {
/// Directory to write the extracted articles to.
pub output_dir: PathBuf,
/// Path to a TSV file that contains one or more of `wikidata`, `wikipedia` columns.
///
/// This can be generated with the `get-tags` command or `osmconvert --csv-headline --csv 'wikidata wikipedia'`.
#[arg(long, help_heading = "FILTERS", value_name = "FILE.tsv")]
pub osm_tags: Option<PathBuf>,
/// Path to file that contains a Wikidata QID to extract on each line
/// (e.g. `Q12345`).
#[arg(long, help_heading = "FILTERS", value_name = "FILE")]
pub wikidata_qids: Option<PathBuf>,
/// Path to file that contains a Wikipedia article url to extract on each line
/// (e.g. `https://lang.wikipedia.org/wiki/Article_Title`).
#[arg(long, help_heading = "FILTERS", value_name = "FILE")]
pub wikipedia_urls: Option<PathBuf>,
/// Append to the provided file path the QIDs of articles matched by title but not QID.
///
/// Use this to save the QIDs of articles you know the url of, but not the QID.
/// The same path can later be passed to the `--wikidata-qids` option to extract them from another language's dump.
/// Writes are atomicly appended to the file, so the same path may be used by multiple concurrent instances.
#[arg(long, value_name = "FILE")]
pub write_new_qids: Option<PathBuf>,
}
pub fn run(args: Args) -> anyhow::Result<()> {
let mut wikipedia_titles = if let Some(path) = args.wikipedia_urls {
info!("Loading article urls from {path:?}");
parse_wikipedia_file(path)?
} else {
Default::default()
};
let mut wikidata_qids = if let Some(path) = args.wikidata_qids {
info!("Loading wikidata QIDs from {path:?}");
parse_wikidata_file(path)?
} else {
Default::default()
};
if let Some(ref path) = args.osm_tags {
info!("Loading wikipedia/wikidata osm tags from {path:?}");
let original_items = wikidata_qids.len() + wikipedia_titles.len();
let mut line_errors = Vec::new();
parse_osm_tag_file(
path,
&mut wikidata_qids,
&mut wikipedia_titles,
Some(&mut line_errors),
)?;
if !line_errors.is_empty() {
let error_count = line_errors.len();
let new_items = wikidata_qids.len() + wikipedia_titles.len() - original_items;
let expected_threshold = 0.02;
let percentage = 100.0 * error_count as f64 / new_items as f64;
let level = if percentage >= expected_threshold {
log::Level::Error
} else {
log::Level::Info
};
log!(
level,
"{error_count} errors ({percentage:.4}%) parsing osm tags from {path:?}",
);
}
}
debug!("Parsed {} unique article titles", wikipedia_titles.len());
debug!("Parsed {} unique wikidata QIDs", wikidata_qids.len());
// NOTE: For atomic writes to the same file across threads/processes:
// - The file needs to be opened in APPEND mode (`.append(true)`).
// - Each write needs to be a single syscall (for Rust, use `format!` for formatting before calling `write!`, or `write!` to a `String` first).
// - Each write needs to be under `PIPE_BUF` size (see `man write(3)`), usually 4kb on Linux.
//
// For more information, see:
// - `man write(3posix)`: https://www.man7.org/linux/man-pages/man3/write.3p.html
// - `std::fs::OpenOptions::append`: https://doc.rust-lang.org/std/fs/struct.OpenOptions.html#method.append
// - https://stackoverflow.com/questions/1154446/is-file-append-atomic-in-unix
let mut write_new_qids = args
.write_new_qids
.as_ref()
.map(|p| File::options().create(true).append(true).open(p))
.transpose()?;
if !args.output_dir.is_dir() {
bail!("output dir {:?} does not exist", args.output_dir)
}
info!("Processing dump");
let dump = stdin().lock();
// TODO: Compare different deserialization methods.
// The docs warn against using a reader directly, and it's slower than tar can decompress the dump.
// let stream = serde_json::Deserializer::from_reader(dump).into_iter::<Page>();
let stream = dump.lines().map(|r| {
r.map_err(anyhow::Error::new)
.and_then(|s| serde_json::from_str::<Page>(&s).map_err(anyhow::Error::new))
});
for page in stream {
let page = page?;
let qid = page.wikidata();
let is_wikidata_match = qid
.as_ref()
.map(|qid| wikidata_qids.contains(qid))
.unwrap_or_default();
let matching_titles = if wikipedia_titles.is_empty() {
Default::default()
biodranik commented 2023-08-09 21:33:40 +00:00 (Migrated from github.com)
Review

What is the benefit of hiding errors under a threshold? Isn't it beneficial to see all errors and be able to estimate/compare the quality of the dump, and to easily grep/find what is most important, or feed the whole log to contributors for fixes?

What is the benefit of hiding errors under a threshold? Isn't it beneficial to see all errors and be able to estimate/compare the quality of the dump, and to easily grep/find what is most important, or feed the whole log to contributors for fixes?
newsch commented 2023-08-09 22:40:00 +00:00 (Migrated from github.com)
Review

The threshold only determines if the message is info vs error level.
When you use the run.sh script with multiple languages it prints a copy of the hundreds of errors for each language.
I think writing the parse errors to a file separately will be easier to read and deal with.

I'm open to other ideas.

The threshold only determines if the message is `info` vs `error` level. When you use the `run.sh` script with multiple languages it prints a copy of the hundreds of errors for each language. I think writing the parse errors to a file separately will be easier to read and deal with. I'm open to other ideas.
} else {
page.all_titles()
.filter_map(|r| {
r.map(Some).unwrap_or_else(|e| {
warn!("Could not parse title for {:?}: {:#}", &page.name, e);
None
})
})
.filter(|t| wikipedia_titles.contains(t))
.collect::<Vec<_>>()
};
if !is_wikidata_match && matching_titles.is_empty() {
continue;
}
// Write matched new QIDs back to file.
if let (Some(f), Some(qid)) = (&mut write_new_qids, &qid) {
if !is_wikidata_match && !matching_titles.is_empty() {
debug!("Writing new id {} for article {:?}", qid, page.name);
// NOTE: Write to string buffer first to have a single atomic write syscall.
// See `write_new_qids` for more info.
let line = format!("{}\n", qid);
write!(f, "{}", line).with_context(|| {
format!(
"writing new QID to file {:?}",
args.write_new_qids.as_ref().unwrap()
)
})?;
}
}
if let Err(e) = write(&args.output_dir, &page, matching_titles) {
error!("Error writing article {:?}: {:#}", page.name, e);
}
}
Ok(())
}
/// Determine the directory to write the article contents to, create it, and create any necessary symlinks to it.
fn create_article_dir(
base: impl AsRef<Path>,
page: &Page,
redirects: impl IntoIterator<Item = Title>,
) -> anyhow::Result<PathBuf> {
let base = base.as_ref();
let mut redirects = redirects.into_iter();
let main_dir = match page.wikidata() {
None => {
// Write to wikipedia title directory.
// Prefer first redirect, fall back to page title if none exist
info!("Page without wikidata qid: {:?} ({})", page.name, page.url);
redirects
.next()
.or_else(|| match page.title() {
Ok(title) => Some(title),
Err(e) => {
warn!("Unable to parse title for page {:?}: {:#}", page.name, e);
None
}
})
// hard fail when no titles can be parsed
.ok_or_else(|| anyhow!("No available titles for page {:?}", page.name))?
.get_dir(base.to_owned())
}
Some(qid) => {
// Otherwise use wikidata as main directory and symlink from wikipedia titles.
qid.get_dir(base.to_owned())
}
};
if main_dir.is_symlink() {
fs::remove_file(&main_dir)
.with_context(|| format!("removing old link for main directory {:?}", &main_dir))?;
}
fs::create_dir_all(&main_dir)
.with_context(|| format!("creating main directory {:?}", &main_dir))?;
// Write symlinks to main directory.
for title in redirects {
let wikipedia_dir = title.get_dir(base.to_owned());
// Build required directory.
//
// Possible states from previous run:
// - Does not exist (and is not a symlink)
// - Exists, is a directory
// - Exists, is a valid symlink to correct location
// - Exists, is a valid symlink to incorrect location
if wikipedia_dir.exists() {
if wikipedia_dir.is_symlink() {
// Only replace if not valid
if fs::read_link(&wikipedia_dir)? == main_dir {
continue;
}
fs::remove_file(&wikipedia_dir)?;
} else {
fs::remove_dir_all(&wikipedia_dir)?;
}
} else {
// titles can contain `/`, so ensure necessary subdirs exist
let parent_dir = wikipedia_dir.parent().unwrap();
fs::create_dir_all(parent_dir)
.with_context(|| format!("creating wikipedia directory {:?}", parent_dir))?;
}
unix::fs::symlink(&main_dir, &wikipedia_dir).with_context(|| {
format!(
"creating symlink from {:?} to {:?}",
wikipedia_dir, main_dir
)
})?;
}
Ok(main_dir)
}
/// Write selected article to disk.
///
/// - Write page contents to wikidata page (`wikidata.org/wiki/QXXX/lang.html`).
/// - If the page has no wikidata qid, write contents to wikipedia location (`lang.wikipedia.org/wiki/article_title/lang.html`).
/// - Create links from all wikipedia urls and redirects (`lang.wikipedia.org/wiki/a_redirect -> wikidata.org/wiki/QXXX`).
fn write(
base: impl AsRef<Path>,
page: &Page,
redirects: impl IntoIterator<Item = Title>,
) -> anyhow::Result<()> {
let article_dir = create_article_dir(base, page, redirects)?;
// Write html to determined file.
let mut filename = article_dir;
filename.push(&page.in_language.identifier);
filename.set_extension("html");
debug!("{:?}: {:?}", page.name, filename);
if filename.exists() {
debug!("Overwriting existing file");
}
let html = simplify(&page.article_body.html, &page.in_language.identifier);
let mut file =
File::create(&filename).with_context(|| format!("creating html file {:?}", filename))?;
file.write_all(html.as_bytes())
.with_context(|| format!("writing html file {:?}", filename))?;
Ok(())
}

93
src/get_tags.rs Normal file
View file

@ -0,0 +1,93 @@
use std::{
io::{stdout, Read},
sync::mpsc,
thread,
};
use osmpbf::{BlobDecode, BlobReader, Element};
use rayon::prelude::*;
struct Record {
id: String,
wikidata: String,
wikipedia: String,
}
/// Extract matching tags from an osm pbf file and write to stdout in TSV.
pub fn run(pbf: impl Read + Send) -> anyhow::Result<()> {
let reader = BlobReader::new(pbf);
let (send, recv) = mpsc::sync_channel(128);
let writer_thread = thread::Builder::new()
.name("writer".to_string())
.spawn(move || write(recv))?;
reader
.par_bridge()
.try_for_each(move |blob| -> anyhow::Result<()> {
// Based on `osmpbf` implementation of `ElementReader`.
let BlobDecode::OsmData(block) = blob?.decode()? else { return Ok(()) };
for record in block.elements().filter_map(extract_tags) {
send.send(record)?;
}
Ok(())
})?;
let record_count = writer_thread.join().unwrap()?;
info!("Finished processing {record_count} records");
Ok(())
}
fn write(recv: mpsc::Receiver<Record>) -> anyhow::Result<usize> {
let mut output = csv::WriterBuilder::new()
.delimiter(b'\t')
.from_writer(stdout().lock());
output.write_record(["@id", "wikidata", "wikipedia"])?;
let mut count = 0;
for Record {
id,
wikidata,
wikipedia,
} in recv
{
output.write_record([id, wikidata, wikipedia])?;
count += 1;
}
Ok(count)
}
fn extract_tags(el: Element) -> Option<Record> {
match el {
Element::Node(n) => make_record(n.id(), n.tags()),
Element::DenseNode(n) => make_record(n.id(), n.tags()),
Element::Way(w) => make_record(w.id(), w.tags()),
Element::Relation(r) => make_record(r.id(), r.tags()),
}
}
fn make_record<'i>(id: i64, tags: impl 'i + Iterator<Item = (&'i str, &'i str)>) -> Option<Record> {
let mut wikipedia = String::new();
let mut wikidata = String::new();
for (key, value) in tags {
match key {
"wikipedia" => wikipedia = value.trim().to_owned(),
"wikidata" => wikidata = value.trim().to_owned(),
_ => {}
}
}
if wikidata.is_empty() && wikipedia.is_empty() {
return None;
}
Some(Record {
id: id.to_string(),
wikipedia,
wikidata,
})
}

View file

@ -1,19 +1,102 @@
use std::{
fs::{self, File},
io::{stdin, BufRead, Write},
os::unix,
path::{Path, PathBuf},
fs::File,
io::{stdin, stdout, BufReader, Read, Write},
num::NonZeroUsize,
path::PathBuf,
};
use anyhow::{anyhow, bail, Context};
use clap::{CommandFactory, Parser};
use clap::{CommandFactory, Parser, Subcommand};
#[macro_use]
extern crate log;
use om_wikiparser::{
html::simplify,
wm::{parse_wikidata_file, parse_wikipedia_file, Page, WikipediaTitleNorm},
};
mod get_articles;
mod get_tags;
/// Extract articles from Wikipedia Enterprise HTML dumps.
#[derive(Parser)]
#[command(author, version, about, long_about, version = crate::version())]
struct Args {
#[command(subcommand)]
cmd: Cmd,
}
#[derive(Subcommand)]
enum Cmd {
GetArticles(get_articles::Args),
/// Extract wikidata/wikipedia tags from an OpenStreetMap PBF dump.
///
/// Writes to stdout the extracted tags in a TSV format similar to `osmconvert --csv`.
GetTags {
/// The `.osm.pbf` file to use.
pbf_file: PathBuf,
/// The number of threads to spawn to parse and decompress the pbf file.
///
/// Defaults to the number of cores.
#[arg(short, long)]
procs: Option<NonZeroUsize>,
},
/// Apply the same html article simplification used when extracting articles to stdin, and write it to stdout.
///
/// This is meant for testing and debugging.
Simplify {
/// The language to use when processing the article (defaults to `en`).
#[arg(long, default_value_t = String::from("en"))]
lang: String,
},
}
fn main() -> anyhow::Result<()> {
// Use info level by default, load overrides from `RUST_LOG` env variable.
// See https://docs.rs/env_logger/latest/env_logger/index.html#example
env_logger::Builder::new()
.filter_level(log::LevelFilter::Info)
.parse_default_env()
.try_init()?;
let args = Args::parse();
info!("{} {}", Args::command().get_name(), version());
match args.cmd {
Cmd::GetArticles(args) => {
if args.wikidata_qids.is_none()
&& args.wikipedia_urls.is_none()
&& args.osm_tags.is_none()
{
let mut cmd = Args::command();
cmd.error(
clap::error::ErrorKind::MissingRequiredArgument,
"at least one of --osm-tags --wikidata-qids --wikipedia-urls is required",
)
.exit()
}
get_articles::run(args)
}
Cmd::GetTags { pbf_file, procs } => {
rayon::ThreadPoolBuilder::new()
.thread_name(|num| format!("worker{num}"))
.num_threads(procs.map(usize::from).unwrap_or_default())
.build_global()?;
let pbf_file = File::open(pbf_file).map(BufReader::new)?;
get_tags::run(pbf_file)
}
Cmd::Simplify { lang } => {
let mut input = String::new();
stdin().read_to_string(&mut input)?;
let output = om_wikiparser::html::simplify(&input, &lang);
stdout().write_all(output.as_bytes())?;
Ok(())
}
}
}
/// Get the version returned by `git describe`, e.g.:
/// - `v2.0` if a git tag
@ -27,264 +110,3 @@ fn version() -> &'static str {
.or(option_env!("CARGO_PKG_VERSION"))
.unwrap_or("unknown")
}
/// Extract article HTML from Wikipedia Enterprise HTML dumps.
///
/// Expects an uncompressed dump connected to stdin.
#[derive(Parser)]
#[command(version = crate::version())]
struct Args {
/// Directory to write the extracted articles to.
output_dir: PathBuf,
/// Path to file that contains a Wikidata QID to extract on each line
/// (e.g. `Q12345`).
#[arg(long, help_heading = "FILTERS")]
wikidata_ids: Option<PathBuf>,
/// Path to file that contains a Wikipedia article url to extract on each line
/// (e.g. `https://lang.wikipedia.org/wiki/Article_Title`).
#[arg(long, help_heading = "FILTERS")]
wikipedia_urls: Option<PathBuf>,
/// Append to the provided file path the QIDs of articles matched by title but not QID.
///
/// Use this to save the QIDs of articles you know the url of, but not the QID.
/// The same path can later be passed to the `--wikidata-ids` option to extract them from another language's dump.
/// Writes are atomicly appended to the file, so the same path may be used by multiple concurrent instances.
#[arg(long, requires("wikipedia_urls"))]
write_new_ids: Option<PathBuf>,
}
/// Determine the directory to write the article contents to, create it, and create any necessary symlinks to it.
fn create_article_dir(
base: impl AsRef<Path>,
page: &Page,
redirects: impl IntoIterator<Item = WikipediaTitleNorm>,
) -> anyhow::Result<PathBuf> {
let base = base.as_ref();
let mut redirects = redirects.into_iter();
let main_dir = match page.wikidata() {
None => {
// Write to wikipedia title directory.
// Prefer first redirect, fall back to page title if none exist
info!("Page without wikidata qid: {:?} ({})", page.name, page.url);
redirects
.next()
.or_else(|| match page.title() {
Ok(title) => Some(title),
Err(e) => {
warn!("Unable to parse title for page {:?}: {:#}", page.name, e);
None
}
})
// hard fail when no titles can be parsed
.ok_or_else(|| anyhow!("No available titles for page {:?}", page.name))?
.get_dir(base.to_owned())
}
Some(qid) => {
// Otherwise use wikidata as main directory and symlink from wikipedia titles.
qid.get_dir(base.to_owned())
}
};
if main_dir.is_symlink() {
fs::remove_file(&main_dir)
.with_context(|| format!("removing old link for main directory {:?}", &main_dir))?;
}
fs::create_dir_all(&main_dir)
.with_context(|| format!("creating main directory {:?}", &main_dir))?;
// Write symlinks to main directory.
for title in redirects {
let wikipedia_dir = title.get_dir(base.to_owned());
// Build required directory.
//
// Possible states from previous run:
// - Does not exist (and is not a symlink)
// - Exists, is a directory
// - Exists, is a valid symlink to correct location
// - Exists, is a valid symlink to incorrect location
if wikipedia_dir.exists() {
if wikipedia_dir.is_symlink() {
// Only replace if not valid
if fs::read_link(&wikipedia_dir)? == main_dir {
continue;
}
fs::remove_file(&wikipedia_dir)?;
} else {
fs::remove_dir_all(&wikipedia_dir)?;
}
} else {
// titles can contain `/`, so ensure necessary subdirs exist
let parent_dir = wikipedia_dir.parent().unwrap();
fs::create_dir_all(parent_dir)
.with_context(|| format!("creating wikipedia directory {:?}", parent_dir))?;
}
unix::fs::symlink(&main_dir, &wikipedia_dir).with_context(|| {
format!(
"creating symlink from {:?} to {:?}",
wikipedia_dir, main_dir
)
})?;
}
Ok(main_dir)
}
/// Write selected article to disk.
///
/// - Write page contents to wikidata page (`wikidata.org/wiki/QXXX/lang.html`).
/// - If the page has no wikidata qid, write contents to wikipedia location (`lang.wikipedia.org/wiki/article_title/lang.html`).
/// - Create links from all wikipedia urls and redirects (`lang.wikipedia.org/wiki/a_redirect -> wikidata.org/wiki/QXXX`).
fn write(
base: impl AsRef<Path>,
page: &Page,
redirects: impl IntoIterator<Item = WikipediaTitleNorm>,
) -> anyhow::Result<()> {
let article_dir = create_article_dir(base, page, redirects)?;
// Write html to determined file.
let mut filename = article_dir;
filename.push(&page.in_language.identifier);
filename.set_extension("html");
debug!("{:?}: {:?}", page.name, filename);
if filename.exists() {
debug!("Overwriting existing file");
}
let html = simplify(&page.article_body.html, &page.in_language.identifier);
let mut file =
File::create(&filename).with_context(|| format!("creating html file {:?}", filename))?;
file.write_all(html.as_bytes())
.with_context(|| format!("writing html file {:?}", filename))?;
Ok(())
}
fn main() -> anyhow::Result<()> {
// Use info level by default, load overrides from `RUST_LOG` env variable.
// See https://docs.rs/env_logger/latest/env_logger/index.html#example
env_logger::Builder::new()
.filter_level(log::LevelFilter::Info)
.parse_default_env()
.try_init()?;
let args = Args::parse();
if args.wikidata_ids.is_none() && args.wikipedia_urls.is_none() {
let mut cmd = Args::command();
cmd.error(
clap::error::ErrorKind::MissingRequiredArgument,
"one or both of --wikidata-ids and --wikipedia-urls is required",
)
.exit()
}
info!("{} {}", Args::command().get_name(), version());
let wikipedia_titles = if let Some(path) = args.wikipedia_urls {
info!("Loading article urls from {path:?}");
let urls = parse_wikipedia_file(path)?;
debug!("Parsed {} unique article urls", urls.len());
urls
} else {
Default::default()
};
let wikidata_ids = if let Some(path) = args.wikidata_ids {
info!("Loading wikidata ids from {path:?}");
let ids = parse_wikidata_file(path)?;
debug!("Parsed {} unique wikidata ids", ids.len());
ids
} else {
Default::default()
};
// NOTE: For atomic writes to the same file across threads/processes:
// - The file needs to be opened in APPEND mode (`.append(true)`).
// - Each write needs to be a single syscall (for Rust, use `format!` for formatting before calling `write!`, or `write!` to a `String` first).
// - Each write needs to be under `PIPE_BUF` size (see `man write(3)`), usually 4kb on Linux.
//
// For more information, see:
// - `man write(3posix)`: https://www.man7.org/linux/man-pages/man3/write.3p.html
// - `std::fs::OpenOptions::append`: https://doc.rust-lang.org/std/fs/struct.OpenOptions.html#method.append
// - https://stackoverflow.com/questions/1154446/is-file-append-atomic-in-unix
let mut write_new_ids = args
.write_new_ids
.as_ref()
.map(|p| File::options().create(true).append(true).open(p))
.transpose()?;
if !args.output_dir.is_dir() {
bail!("output dir {:?} does not exist", args.output_dir)
}
info!("Processing dump");
let dump = stdin().lock();
// TODO: Compare different deserialization methods.
// The docs warn against using a reader directly, and it's slower than tar can decompress the dump.
// let stream = serde_json::Deserializer::from_reader(dump).into_iter::<Page>();
let stream = dump.lines().map(|r| {
r.map_err(anyhow::Error::new)
.and_then(|s| serde_json::from_str::<Page>(&s).map_err(anyhow::Error::new))
});
for page in stream {
let page = page?;
let qid = page.wikidata();
let is_wikidata_match = qid
.as_ref()
.map(|qid| wikidata_ids.contains(qid))
.unwrap_or_default();
let matching_titles = if wikipedia_titles.is_empty() {
Default::default()
} else {
page.all_titles()
.filter_map(|r| {
r.map(Some).unwrap_or_else(|e| {
warn!("Could not parse title for {:?}: {:#}", &page.name, e);
None
})
})
.filter(|t| wikipedia_titles.contains(t))
.collect::<Vec<_>>()
};
if !is_wikidata_match && matching_titles.is_empty() {
continue;
}
// Write matched new QIDs back to fild.
if let (Some(f), Some(qid)) = (&mut write_new_ids, &qid) {
if !is_wikidata_match && !matching_titles.is_empty() {
debug!("Writing new id {} for article {:?}", qid, page.name);
// NOTE: Write to string buffer first to have a single atomic write syscall.
// See `write_new_ids` for more info.
let line = format!("{}\n", qid);
write!(f, "{}", line).with_context(|| {
format!(
"writing new id to file {:?}",
args.write_new_ids.as_ref().unwrap()
)
})?;
}
}
if let Err(e) = write(&args.output_dir, &page, matching_titles) {
error!("Error writing article {:?}: {:#}", page.name, e);
}
}
Ok(())
}

View file

@ -1,24 +1,23 @@
//! Wikimedia types
use std::{
collections::HashSet, ffi::OsStr, fmt::Display, fs, num::ParseIntError, path::PathBuf,
str::FromStr,
};
use std::{collections::HashSet, error::Error, ffi::OsStr, fmt::Display, fs, str::FromStr};
use anyhow::{anyhow, bail, Context};
use url::Url;
mod page;
pub use page::Page;
mod title;
pub use title::*;
mod qid;
pub use qid::*;
/// Read from a file of urls on each line.
pub fn parse_wikidata_file(path: impl AsRef<OsStr>) -> anyhow::Result<HashSet<WikidataQid>> {
pub fn parse_wikidata_file(path: impl AsRef<OsStr>) -> anyhow::Result<HashSet<Qid>> {
let contents = fs::read_to_string(path.as_ref())?;
Ok(contents
.lines()
.enumerate()
.map(|(i, line)| {
WikidataQid::from_str(line).with_context(|| {
Qid::from_str(line).with_context(|| {
let line_num = i + 1;
format!("on line {line_num}: {line:?}")
})
@ -34,15 +33,13 @@ pub fn parse_wikidata_file(path: impl AsRef<OsStr>) -> anyhow::Result<HashSet<Wi
}
/// Read article titles from a file of urls on each line.
pub fn parse_wikipedia_file(
path: impl AsRef<OsStr>,
) -> anyhow::Result<HashSet<WikipediaTitleNorm>> {
pub fn parse_wikipedia_file(path: impl AsRef<OsStr>) -> anyhow::Result<HashSet<Title>> {
let contents = fs::read_to_string(path.as_ref())?;
Ok(contents
.lines()
.enumerate()
.map(|(i, line)| {
WikipediaTitleNorm::from_url(line).with_context(|| {
Title::from_url(line).with_context(|| {
let line_num = i + 1;
format!("on line {line_num}: {line:?}")
})
@ -57,147 +54,120 @@ pub fn parse_wikipedia_file(
.collect())
}
/// Wikidata QID/Q Number
///
/// See https://www.wikidata.org/wiki/Wikidata:Glossary#QID
///
/// ```
/// use std::str::FromStr;
/// use om_wikiparser::wm::WikidataQid;
///
/// let with_q = WikidataQid::from_str("Q12345").unwrap();
/// let without_q = WikidataQid::from_str(" 12345 ").unwrap();
/// assert_eq!(with_q, without_q);
///
/// assert!(WikidataQid::from_str("q12345").is_ok());
/// assert!(WikidataQid::from_str("https://wikidata.org/wiki/Q12345").is_err());
/// assert!(WikidataQid::from_str("Article_Title").is_err());
/// assert!(WikidataQid::from_str("Q").is_err());
/// assert!(WikidataQid::from_str("").is_err());
/// ```
#[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Hash)]
pub struct WikidataQid(u32);
pub fn parse_osm_tag_file(
path: impl AsRef<OsStr>,
qids: &mut HashSet<Qid>,
titles: &mut HashSet<Title>,
mut line_errors: Option<&mut Vec<ParseLineError>>,
) -> anyhow::Result<()> {
let path = path.as_ref();
let mut rdr = csv::ReaderBuilder::new().delimiter(b'\t').from_path(path)?;
impl FromStr for WikidataQid {
type Err = ParseIntError;
let mut push_error = |e: ParseLineError| {
debug!("Tag parse error: {e}");
if let Some(ref mut errs) = line_errors {
errs.push(e);
}
};
fn from_str(s: &str) -> Result<Self, Self::Err> {
let s = s.trim();
let s = s.strip_prefix(['Q', 'q']).unwrap_or(s);
u32::from_str(s).map(WikidataQid)
let mut qid_col = None;
let mut title_col = None;
for (column, title) in rdr.headers()?.iter().enumerate() {
match title {
"wikidata" => qid_col = Some(column),
"wikipedia" => title_col = Some(column),
_ => (),
}
}
let qid_col = qid_col.ok_or_else(|| anyhow!("Cannot find 'wikidata' column"))?;
let title_col = title_col.ok_or_else(|| anyhow!("Cannot find 'wikipedia' column"))?;
let mut row = csv::StringRecord::new();
loop {
match rdr.read_record(&mut row) {
Ok(true) => {}
// finished
Ok(false) => break,
// attempt to recover from parsing errors
Err(e) => {
if e.is_io_error() {
bail!(e)
}
push_error(ParseLineError {
text: String::new(),
line: rdr.position().line(),
kind: e.into(),
});
continue;
}
}
let qid = &row[qid_col].trim();
if !qid.is_empty() {
match Qid::from_str(qid) {
Ok(qid) => {
qids.insert(qid);
}
Err(e) => push_error(ParseLineError {
text: qid.to_string(),
line: rdr.position().line(),
kind: e.into(),
}),
}
}
let title = &row[title_col].trim();
if !title.is_empty() {
match Title::from_osm_tag(title) {
Ok(title) => {
titles.insert(title);
}
Err(e) => push_error(ParseLineError {
text: title.to_string(),
line: rdr.position().line(),
kind: e.into(),
}),
}
}
}
Ok(())
}
impl Display for WikidataQid {
#[derive(Debug, thiserror::Error)]
pub enum ParseErrorKind {
#[error("bad title")]
Title(#[from] ParseTitleError),
#[error("bad QID")]
Qid(#[from] ParseQidError),
#[error("bad TSV line")]
Tsv(#[from] csv::Error),
}
#[derive(Debug)]
pub struct ParseLineError {
text: String,
line: u64,
kind: ParseErrorKind,
}
impl Display for ParseLineError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Q{}", self.0)
// write source chain to ensure they are logged
write!(f, "on line {}: {:?}: {}", self.line, self.text, self.kind)?;
let mut source = self.kind.source();
while let Some(e) = source {
write!(f, ": {}", e)?;
source = e.source();
}
Ok(())
}
}
impl WikidataQid {
pub fn get_dir(&self, base: PathBuf) -> PathBuf {
let mut path = base;
path.push("wikidata");
// TODO: can use as_mut_os_string with 1.70.0
path.push(self.to_string());
path
}
}
/// Normalized wikipedia article title that can compare:
/// - titles `Spatial Database`
/// - urls `https://en.wikipedia.org/wiki/Spatial_database#Geodatabase`
/// - osm-style tags `en:Spatial Database`
///
/// ```
/// use om_wikiparser::wm::WikipediaTitleNorm;
///
/// let title = WikipediaTitleNorm::from_title("Article Title", "en").unwrap();
/// let url = WikipediaTitleNorm::from_url("https://en.wikipedia.org/wiki/Article_Title#Section").unwrap();
/// assert_eq!(url, title);
///
/// assert!(WikipediaTitleNorm::from_url("https://en.wikipedia.org/not_a_wiki_page").is_err());
/// assert!(WikipediaTitleNorm::from_url("https://wikidata.org/wiki/Q12345").is_err());
///
/// assert!(
/// WikipediaTitleNorm::from_url("https://de.wikipedia.org/wiki/Breil/Brigels").unwrap() !=
/// WikipediaTitleNorm::from_url("https://de.wikipedia.org/wiki/Breil").unwrap()
/// );
/// ```
#[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Hash)]
pub struct WikipediaTitleNorm {
lang: String,
name: String,
}
impl WikipediaTitleNorm {
fn normalize_title(title: &str) -> String {
// TODO: Compare with map generator url creation, ensure covers all cases.
title.trim().replace(' ', "_")
}
// https://en.wikipedia.org/wiki/Article_Title/More_Title
pub fn from_url(url: &str) -> anyhow::Result<Self> {
let url = Url::parse(url.trim())?;
let (subdomain, host) = url
.host_str()
.ok_or_else(|| anyhow!("Expected host"))?
.split_once('.')
.ok_or_else(|| anyhow!("Expected subdomain"))?;
if host != "wikipedia.org" {
bail!("Expected wikipedia.org for domain")
}
let lang = subdomain;
let path = url.path();
let (root, title) = path
.strip_prefix('/')
.unwrap_or(path)
.split_once('/')
.ok_or_else(|| anyhow!("Expected at least two segments in path"))?;
if root != "wiki" {
bail!("Expected 'wiki' as root path, got: {:?}", root)
}
let title = urlencoding::decode(title)?;
Self::from_title(&title, lang)
}
// en:Article Title
fn _from_osm_tag(tag: &str) -> anyhow::Result<Self> {
let (lang, title) = tag
.trim()
.split_once(':')
.ok_or_else(|| anyhow!("Expected ':'"))?;
Self::from_title(title, lang)
}
pub fn from_title(title: &str, lang: &str) -> anyhow::Result<Self> {
let title = title.trim();
let lang = lang.trim();
if title.is_empty() {
bail!("title cannot be empty or whitespace");
}
if lang.is_empty() {
bail!("lang cannot be empty or whitespace");
}
let name = Self::normalize_title(title);
let lang = lang.to_owned();
Ok(Self { name, lang })
}
pub fn get_dir(&self, base: PathBuf) -> PathBuf {
let mut path = base;
// TODO: can use as_mut_os_string with 1.70.0
path.push(format!("{}.wikipedia.org", self.lang));
path.push("wiki");
path.push(&self.name);
path
impl Error for ParseLineError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
// return nothing b/c Display prints source chain
None
}
}

View file

@ -1,8 +1,9 @@
use std::{iter, str::FromStr};
use anyhow::Context;
use serde::Deserialize;
use super::{WikidataQid, WikipediaTitleNorm};
use super::{Qid, Title};
// TODO: consolidate into single struct
/// Deserialized Wikimedia Enterprise API Article
@ -25,27 +26,29 @@ pub struct Page {
}
impl Page {
pub fn wikidata(&self) -> Option<WikidataQid> {
pub fn wikidata(&self) -> Option<Qid> {
// TODO: return error
self.main_entity
.as_ref()
.map(|e| WikidataQid::from_str(&e.identifier).unwrap())
.map(|e| Qid::from_str(&e.identifier).unwrap())
}
/// Title of the article
pub fn title(&self) -> anyhow::Result<WikipediaTitleNorm> {
WikipediaTitleNorm::from_title(&self.name, &self.in_language.identifier)
pub fn title(&self) -> anyhow::Result<Title> {
Title::from_title(&self.name, &self.in_language.identifier)
.with_context(|| format!("bad title {:?}", self.name))
}
/// All titles that lead to the article, the main title followed by any redirects.
pub fn all_titles(&self) -> impl Iterator<Item = anyhow::Result<WikipediaTitleNorm>> + '_ {
pub fn all_titles(&self) -> impl Iterator<Item = anyhow::Result<Title>> + '_ {
iter::once(self.title()).chain(self.redirects())
}
pub fn redirects(&self) -> impl Iterator<Item = anyhow::Result<WikipediaTitleNorm>> + '_ {
self.redirects
.iter()
.map(|r| WikipediaTitleNorm::from_title(&r.name, &self.in_language.identifier))
pub fn redirects(&self) -> impl Iterator<Item = anyhow::Result<Title>> + '_ {
self.redirects.iter().map(|r| {
Title::from_title(&r.name, &self.in_language.identifier)
.with_context(|| format!("bad redirect {:?}", self.name))
})
}
}

64
src/wm/qid.rs Normal file
View file

@ -0,0 +1,64 @@
use std::{error::Error, fmt::Display, num::ParseIntError, path::PathBuf, str::FromStr};
/// Wikidata QID/Q Number
///
/// See https://www.wikidata.org/wiki/Wikidata:Glossary#QID
///
/// ```
/// use std::str::FromStr;
/// use om_wikiparser::wm::Qid;
///
/// let with_q = Qid::from_str("Q12345").unwrap();
/// let without_q = Qid::from_str(" 12345 ").unwrap();
/// assert_eq!(with_q, without_q);
///
/// assert!(Qid::from_str("q12345").is_ok());
/// assert!(Qid::from_str("https://wikidata.org/wiki/Q12345").is_err());
/// assert!(Qid::from_str("Article_Title").is_err());
/// assert!(Qid::from_str("Q").is_err());
/// assert!(Qid::from_str("").is_err());
/// ```
#[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Hash)]
pub struct Qid(u32);
impl FromStr for Qid {
type Err = ParseQidError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let s = s.trim();
let s = s.strip_prefix(['Q', 'q']).unwrap_or(s);
u32::from_str(s).map(Qid).map_err(ParseQidError)
}
}
impl Display for Qid {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Q{}", self.0)
}
}
impl Qid {
pub fn get_dir(&self, base: PathBuf) -> PathBuf {
let mut path = base;
path.push("wikidata");
// TODO: can use as_mut_os_string with 1.70.0
path.push(self.to_string());
path
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct ParseQidError(ParseIntError);
impl Display for ParseQidError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl Error for ParseQidError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
self.0.source()
}
}

150
src/wm/title.rs Normal file
View file

@ -0,0 +1,150 @@
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
use std::{fmt::Display, path::PathBuf, string::FromUtf8Error};
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
use url::Url;
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// Normalized wikipedia article title that can compare:
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// - titles `Spatial Database`
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// - urls `https://en.wikipedia.org/wiki/Spatial_database#Geodatabase`
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// - osm-style tags `en:Spatial Database`
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
///
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// ```
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// use om_wikiparser::wm::Title;
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
///
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// let title = Title::from_title("Article Title", "en").unwrap();
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// let url = Title::from_url("https://en.wikipedia.org/wiki/Article_Title#Section").unwrap();
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// let mobile = Title::from_url("https://en.m.wikipedia.org/wiki/Article_Title#Section").unwrap();
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// let url_tag1 = Title::from_osm_tag("https://en.m.wikipedia.org/wiki/Article_Title#Section").unwrap();
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// let url_tag2 = Title::from_osm_tag("de:https://en.m.wikipedia.org/wiki/Article_Title#Section").unwrap();
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// assert_eq!(url, title);
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// assert_eq!(url, mobile);
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// assert_eq!(url, url_tag1);
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// assert_eq!(url, url_tag2);
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
///
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// assert!(Title::from_url("https://en.wikipedia.org/not_a_wiki_page").is_err());
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// assert!(Title::from_url("https://wikidata.org/wiki/Q12345").is_err());
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
///
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// assert!(
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// Title::from_url("https://de.wikipedia.org/wiki/Breil/Brigels").unwrap() !=
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// Title::from_url("https://de.wikipedia.org/wiki/Breil").unwrap()
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// );
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
/// ```
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
#[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Hash)]
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
pub struct Title {
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
lang: String,
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
name: String,
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
}
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
impl Display for Title {
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
write!(f, "{}:{}", self.lang, self.name)
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
}
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
}
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
impl Title {
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
fn normalize_title(title: &str) -> String {
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
// TODO: Compare with map generator url creation, ensure covers all cases.
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
title.trim().replace(' ', "_")
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
}
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
// https://en.wikipedia.org/wiki/Article_Title/More_Title
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
pub fn from_url(url: &str) -> Result<Self, ParseTitleError> {
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
let url = Url::parse(url.trim())?;
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
let (subdomain, host) = url
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
.host_str()
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
.ok_or(ParseTitleError::NoHost)?
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
.split_once('.')
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
.ok_or(ParseTitleError::NoSubdomain)?;
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
let host = host.strip_prefix("m.").unwrap_or(host);
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
if host != "wikipedia.org" {
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
return Err(ParseTitleError::BadDomain);
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
}
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
let lang = subdomain;
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
let path = url.path();
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
let (root, title) = path
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
.strip_prefix('/')
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
.unwrap_or(path)
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
.split_once('/')
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
.ok_or(ParseTitleError::ShortPath)?;
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
if root != "wiki" {
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
return Err(ParseTitleError::BadPath);
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
}
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
let title = urlencoding::decode(title)?;
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
Self::from_title(&title, lang)
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
}
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
// en:Article Title
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
pub fn from_osm_tag(tag: &str) -> Result<Self, ParseTitleError> {
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
let (lang, title) = tag
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
.trim()
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
.split_once(':')
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
.ok_or(ParseTitleError::MissingColon)?;
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
let lang = lang.trim_start();
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
let title = title.trim_start();
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
if matches!(lang, "http" | "https") {
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
return Self::from_url(tag);
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
}
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
if title.starts_with("http://") || title.starts_with("https://") {
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
return Self::from_url(title);
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
}
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
Self::from_title(title, lang)
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
}
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
pub fn from_title(title: &str, lang: &str) -> Result<Self, ParseTitleError> {
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
let title = title.trim();
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
let lang = lang.trim();
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
if title.is_empty() {
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
return Err(ParseTitleError::NoTitle);
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
}
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
if lang.is_empty() {
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
return Err(ParseTitleError::NoLang);
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
}
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
let name = Self::normalize_title(title);
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
let lang = lang.to_owned();
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
Ok(Self { name, lang })
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
}
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
pub fn get_dir(&self, base: PathBuf) -> PathBuf {
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
let mut path = base;
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
// TODO: can use as_mut_os_string with 1.70.0
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
path.push(format!("{}.wikipedia.org", self.lang));
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
path.push("wiki");
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
path.push(&self.name);
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
path
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
}
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
}
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
#[derive(Debug, PartialEq, Eq, thiserror::Error)]
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
pub enum ParseTitleError {
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
#[error("title cannot be empty or whitespace")]
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
NoTitle,
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
#[error("lang cannot be empty or whitespace")]
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
NoLang,
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
#[error("no ':' separating lang and title")]
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
MissingColon,
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
// url-specific
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
#[error("cannot parse url")]
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
Url(#[from] url::ParseError),
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
#[error("cannot decode url")]
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
UrlDecode(#[from] FromUtf8Error),
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
#[error("no host in url")]
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
NoHost,
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
#[error("no subdomain in url")]
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
NoSubdomain,
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
#[error("url base domain is wikipedia.org")]
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
BadDomain,
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
#[error("url base path is not /wiki/")]
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
BadPath,
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
#[error("path has less than 2 segments")]
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
ShortPath,
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string
}
biodranik commented 2023-08-09 21:29:57 +00:00 (Migrated from github.com)
Review

Does it make sense to print wrong hosts in a log to fix/support them?

Does it make sense to print wrong hosts in a log to fix/support them?
biodranik commented 2023-08-09 21:30:14 +00:00 (Migrated from github.com)
Review

ditto

ditto
newsch commented 2023-08-09 22:22:04 +00:00 (Migrated from github.com)
Review

They are caught at a higher level and logged/saved with the full string

They are caught at a higher level and logged/saved with the full string