Add script for running with map generator #21
6
.github/workflows/rust-checks.yml
vendored
|
@ -3,9 +3,9 @@ name: Rust Checks
|
|||
on:
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- .gitignore
|
||||
- LICENSE
|
||||
- README.md
|
||||
- .gitignore
|
||||
- LICENSE
|
||||
- README.md
|
||||
|
||||
jobs:
|
||||
test:
|
||||
|
|
18
.github/workflows/shell-checks.yml
vendored
Normal file
|
@ -0,0 +1,18 @@
|
|||
name: Shell Checks
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- "**.sh"
|
||||
|
||||
jobs:
|
||||
test:
|
||||
name: shellcheck
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Run shellcheck
|
||||
run: |
|
||||
shellcheck --version
|
||||
shellcheck -x *.sh
|
13
README.md
|
@ -2,6 +2,9 @@
|
|||
|
||||
_Extracts articles from [Wikipedia database dumps](https://en.wikipedia.org/wiki/Wikipedia:Database_download) for embedding into the `mwm` map files created by [the Organic Maps generator](https://github.com/organicmaps/organicmaps/blob/master/tools/python/maps_generator/README.md)._
|
||||
|
||||
Extracted articles are identified by Wikipedia article titles in url or text form (language-specific), and [Wikidata QIDs](https://www.wikidata.org/wiki/Wikidata:Glossary#QID) (language-agnostic).
|
||||
OpenStreetMap commonly stores these as [`wikipedia*=`](https://wiki.openstreetmap.org/wiki/Key:wikipedia) and [`wikidata=`](https://wiki.openstreetmap.org/wiki/Key:wikidata) tags on objects.
|
||||
|
||||
## Configuring
|
||||
|
||||
[`article_processing_config.json`](article_processing_config.json) should be updated when adding a new language.
|
||||
|
@ -9,6 +12,11 @@ It defines article sections that are not important for users and should be remov
|
|||
|
||||
## Usage
|
||||
|
||||
To use with the map generator, see the [`run.sh` script](run.sh) and its own help documentation.
|
||||
It handles preparing the inputs, using multiple dumps, and re-running to convert titles to QIDs and extract them across languages.
|
||||
|
||||
To run the wikiparser manually or for development, see below.
|
||||
|
||||
First, install [the rust language tools](https://www.rust-lang.org/)
|
||||
|
||||
For best performance, use `--release` when building or running.
|
||||
|
@ -19,7 +27,7 @@ Alternatively, build it with `cargo build --release`, which places the binary in
|
|||
|
||||
Run the program with the `--help` flag to see all supported arguments.
|
||||
|
||||
```shell
|
||||
```
|
||||
$ cargo run --release -- --help
|
||||
Extract article HTML from Wikipedia Enterprise HTML dumps.
|
||||
|
||||
|
@ -57,10 +65,11 @@ It takes as inputs:
|
|||
- A file of Wikipedia article titles to extract, one per line (e.g. `https://$LANG.wikipedia.org/wiki/$ARTICLE_TITLE`), passed as a CLI flag `--wikipedia-urls`.
|
||||
- A directory to write the extracted articles to, as a CLI argument.
|
||||
|
||||
As an example of usage with the map generator:
|
||||
As an example of manual usage with the map generator:
|
||||
- Assuming this program is installed to `$PATH` as `om-wikiparser`.
|
||||
- Download [the dumps in the desired languages](https://dumps.wikimedia.org/other/enterprise_html/runs/) (Use the files with the format `${LANG}wiki-NS0-${DATE}-ENTERPRISE-HTML.json.tar.gz`).
|
||||
Set `DUMP_DOWNLOAD_DIR` to the location they are downloaded.
|
||||
- Run a maps build with descriptions enabled to generate the `id_to_wikidata.csv` and `wiki_urls.txt` files.
|
||||
- Run the following from within the `intermediate_data` subdirectory of the maps build directory:
|
||||
|
||||
```shell
|
||||
|
|
32
build.rs
Normal file
|
@ -0,0 +1,32 @@
|
|||
use std::process::Command;
|
||||
|
||||
/// Pass git-describe through CARGO_GIT_VERSION env variable
|
||||
![]() I've found it useful in the past to embed the git commit in the binary so that when I'm looking through logs I can tell what version was running. I've found it useful in the past to embed the git commit in the binary so that when I'm looking through logs I can tell what version was running.
I can remove it if you don't think it's useful.
|
||||
///
|
||||
/// NOTE: Cargo.toml still needs to be updated on releases
|
||||
fn set_version_from_git() {
|
||||
let cmd = Command::new("git")
|
||||
.arg("describe")
|
||||
.arg("--always")
|
||||
.arg("--dirty")
|
||||
.arg("--tags")
|
||||
.output();
|
||||
|
||||
match cmd {
|
||||
Ok(output) if output.status.success() => {
|
||||
let version = String::from_utf8_lossy(&output.stdout);
|
||||
let version = version.trim();
|
||||
println!("cargo:rustc-env=CARGO_GIT_VERSION={}", version);
|
||||
// rerun when git checks out another ref or any ref changes
|
||||
println!("cargo:rerun-if-changed=.git/refs/");
|
||||
println!("cargo:rerun-if-changed=.git/HEAD");
|
||||
}
|
||||
_ => {
|
||||
// crates.io builds without git, so ignore here
|
||||
eprintln!("git describe failed; ignoring");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
set_version_from_git();
|
||||
}
|
7
lib.sh
Normal file
|
@ -0,0 +1,7 @@
|
|||
# Shared functions for scripts
|
||||
# shellcheck shell=bash
|
||||
|
||||
# Write message to stderr with a timestamp and line ending.
|
||||
log () {
|
||||
echo -e "$(date '+%Y-%m-%dT%H:%M:%SZ')" "$@" >&2
|
||||
}
|
152
run.sh
Executable file
|
@ -0,0 +1,152 @@
|
|||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
#! /usr/bin/env bash
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
# shellcheck disable=SC2016 # Backticks not used as expansions in documentation.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
USAGE='Usage: ./run.sh [-h] <BUILD_DIR> <DUMP_FILE.json.tar.gz> [<DUMP_FILE.json.tar.gz>...]
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
A convenience script to run the wikiparser with the maps generator as a drop-in replacement for the descriptions scraper.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
Arguments:
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
<BUILD_DIR> An existing directory to place descriptions in.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
The `id_to_wikidata.csv` and `wiki_urls.txt` files output by the
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
maps generator must be placed in this directory before running.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
The extracted articles will be placed in a `descriptions`
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
subdirectory within this directory.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
The `intermediate_data` subfolder of a maps build directory may
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
be used for this. The same folder may be used for multiple runs.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
<DUMP_FILE> A wikipedia enterprise html dump. These take the form of
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
`enwiki-NS0-20230401-ENTERPRISE-HTML.json.tar.gz`. Multiple
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
dumps in the same language SHOULD NOT be provided, and will
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
result in inconsistent data.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
Options:
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
-h Print this help screen
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
1. Builds wikiparser.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
2. Extracts wikidata ids and wikipedia urls from generator intermediate files `id_to_wikidata.csv` and `wiki_urls.txt`.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
3. Runs wikiparser in parallel for all input dump files (NOTE: this currently starts 2 processes for each dump files).
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
For information on running the wikiparser manually, see README.md.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
For more information on the map generator, see
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
<https://github.com/organicmaps/organicmaps/blob/b52b42bd746fdb8daf05cc048f0b22654cfb9b8e/tools/python/maps_generator/README.md>.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
'
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
set -euo pipefail
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
# set -x
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
# Parse options.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
while getopts "h" opt
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
do
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
case $opt in
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
h) echo -n "$USAGE" >&2; exit 0;;
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
?) echo "$USAGE" | head -n1 >&2; exit 1;;
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
esac
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
done
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
shift $((OPTIND - 1))
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
if [ -z "${2-}" ]; then
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
echo "BUILD_DIR and at least one DUMP_FILE are required" >&2
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
echo -n "$USAGE" >&2
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
exit 1
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
fi
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
# Process and canonicalize all path arguments before changing directories.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
BUILD_DIR=$(readlink -f -- "$1")
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
shift
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
if [ ! -d "$BUILD_DIR" ]; then
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
echo "BUILD_DIR '$BUILD_DIR' does not exist or is not a directory" >&2
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
exit 1
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
fi
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
DUMP_FILES=()
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
while (( $# > 0 )); do
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
dump_file="$(readlink -f -- "$1")"
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
if [ ! -f "$dump_file" ]; then
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
echo "DUMP_FILE '$dump_file' does not exist or is not a file" >&2
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
exit 1
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
fi
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
DUMP_FILES+=("$dump_file")
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
shift
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
done
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
# Ensure we're running in the directory of this script.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
SCRIPT_PATH=$(dirname "$0")
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
cd "$SCRIPT_PATH"
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
SCRIPT_PATH=$(pwd)
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
# only load library after changing to script directory
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
source lib.sh
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
log "Using maps build directory '$BUILD_DIR'"
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
if ! command -v "cargo" > /dev/null; then
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
echo -e "'cargo' is not installed, cannot build wikiparser.\nSee <https://www.rust-lang.org/>." >&2
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
exit 1
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
fi
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
log "Building wikiparser"
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
cargo build --release
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
wikiparser=$(pwd)/target/release/om-wikiparser
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
log "Changing to maps build dir '$BUILD_DIR'"
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
cd "$BUILD_DIR"
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
log "Transforming intermediate generator data"
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
for intermediate_file in id_to_wikidata.csv wiki_urls.txt; do
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
if [ ! -e "$intermediate_file" ]; then
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
echo -e "Cannot find intermediate generator file '$intermediate_file' in maps build dir '$BUILD_DIR/'\nWas the descriptions step run?" >&2
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
exit 1
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
fi
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
done
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
cut -f 2 id_to_wikidata.csv > wikidata_ids.txt
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
tail -n +2 wiki_urls.txt | cut -f 3 > wikipedia_urls.txt
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
# Enable backtraces in errors and panics.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
export RUST_BACKTRACE=1
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
# Set log level.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
export RUST_LOG=om_wikiparser=info
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
# Begin extraction.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
OUTPUT_DIR=$(pwd)/descriptions
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
if [ ! -e "$OUTPUT_DIR" ]; then
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
mkdir "$OUTPUT_DIR"
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
fi
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
log "Extracting articles to '$OUTPUT_DIR'"
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
kill_jobs() {
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
pids=$(jobs -p)
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
if [ -n "$pids" ]; then
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
log "Killing background jobs"
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
# shellcheck disable=SC2086 # PIDs are intentionally expanded.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
kill $pids
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
log "Waiting for background jobs to stop"
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
wait
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
fi
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
}
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
trap 'kill_jobs' SIGINT SIGTERM EXIT
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
for dump in "${DUMP_FILES[@]}"; do
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
log "Extracting '$dump'"
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
tar xzOf "$dump" | "$wikiparser" \
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
--wikidata-ids wikidata_ids.txt \
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
--wikipedia-urls wikipedia_urls.txt \
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
--write-new-ids new_qids.txt \
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
"$OUTPUT_DIR" &
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
done
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
wait
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
log "Beginning extraction of discovered QIDs"
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
# Extract new qids from other dumps in parallel.
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
for dump in "${DUMP_FILES[@]}"; do
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
tar xzOf "$dump" | "$wikiparser" \
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
--wikidata-ids new_qids.txt \
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
"$OUTPUT_DIR" &
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
done
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
wait
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
||||
log "Finished"
|
||||
![]()
Checking pipe failures helps. ```suggestion
set -euxo pipefail
```
Checking pipe failures helps.
If -x echo doesn't hurt, then it can be always used, for better understanding what magic is going under the hood.
![]() Is it a copy-paste between scripts that can be reused? :) Is it a copy-paste between scripts that can be reused? :)
![]() Can echo be used here? Can echo be used here?
![]() Why is it better than echo? P.S. This and the previous comment are also related to another script. Why is it better than echo?
P.S. This and the previous comment are also related to another script.
![]() nit: here and in other places
nit: here and in other places
```suggestion
if [ -z "${MAPS_DIR+}" ]; then
```
![]() Why colon is needed? What is the purpose of this line? Why colon is needed? What is the purpose of this line?
![]() ditto: why printf is better than echo? ditto: why printf is better than echo?
![]() It would be great to clarify why the latest map build is needed at all. It would be great to clarify why the latest map build is needed at all.
![]() Am I correctly understanding the issue with the current approach?
Am I correctly understanding the issue with the current approach?
1. Generator builds maps and creates csv/txt wiki ids files.
2. Wikiparser runs and generates articles.
3. Generator should be run again to reuse generated articles?..
![]()
`pipefail` is unique to bash, I wrote this as a posix sh script. Happy to switch if you'd rather use bash.
![]() Yes, should I put this in a third file and Yes, should I put this in a third file and `source` it?
![]() I used printf because echo doesn't handle escaped characters like I used printf because echo doesn't handle escaped characters like `\t` and `\n` in a portable way, and you can format numbers and other things nicely.
![]() It sets It could be replaced with:
It sets `MAPS_BUILD_ROOT` to `~/maps_build` if it doesn't exist already, the colon is a builtin no-op so that the expansion is evaluated but not used.
It could be replaced with:
```sh
if [ -z "${MAPS_BUILD_ROOT+}" ]; then
MAPS_BUILD_ROOT="$HOME/maps_build"
fi
```
![]() To replace the workflow with the scraper:
So if you'd like to do it in one run, we could update the generator to call wikiparser. To replace the workflow with the scraper:
1. The generator needs to be run at least to the ["Features" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L116) in order to generate the wiki files.
2. Running Wikiparser is a replacement for the ["DownloadDescriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L144)
3. The generator can now be run from the ["Descriptions" stage](https://github.com/organicmaps/organicmaps/blob/a1b596bdc64ed5db3eabf6b1e331411aa2e4ab03/tools/python/maps_generator/generator/stages_declaration.py#L292).
So if you'd like to do it in one run, we could update the generator to call wikiparser.
Or we could tweak the generator to only output the descriptions and continue so you can run wikiparser out-of-band.
![]() Bash is the default shell used in many companies. It is a good practice to write bash scripts and use bash features.
Bash is the default shell used in [many companies](https://google.github.io/styleguide/shellguide.html). It is a good practice to write bash scripts and use bash features.
`#!/usr/bin/env bash`
![]() Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts. Yes, there is nothing wrong with that approach to avoid copy-paste. Imagine you'll introduce a third script, or split your current one into parts.
![]()
1. Do we really expect newlines and tabs in logs? Is it good practice?
2. What kind of number formatting happens in this line?
![]()
1. More readable if form is preferred to the less known colon.
2. Will `FOO="${FOO:-default value}"` work here?
![]() Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available. WDYT? Updating the generator to call wikiparser means that generator should wait until wikiparser finishes, right? This approach may be a good temporary start, but we aim to speed up the map generation process as much as possible. That's why the ideal solution would (likely) be to start generator and wikiparser in parallel, as soon as a new osm planet dump is available (or maybe start wikiparser before the generator). So when the generator needs descriptions they will already be available.
That's why it was important to focus on speedy articles extraction/processing from the start.
WDYT?
![]() Yes, Yes, `:-` should work
![]() Yes, it replaces the blocking scraper script in it's current form. In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel. Yes, it replaces the blocking scraper script in it's current form.
I thought it would be better to start with this working and then separate and speed up the process.
To separate fully from the generator we need to finish #19.
In the meantime we could also use the outputs from an old map build and run the wikiparser ahead of time/in parallel.
![]()
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant. 1. I think newlines and tabs are helpful for separating long content, but I don't think it's a necessity here.
2. On line 25 it wrapper around printf, so any call to log can use printf's formatting string
In other places I've heard printf recommended over echo, but if we're using bash explicitly then the portability concerns are not relevant.
![]() @biodranik do you want to use gnu @biodranik do you want to use gnu `parallel` or something else here instead of a serial for loop?
![]() Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles). If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway. Maybe ideal workflow would be to start wikiparser in parallel with the map generation and make generator aware of when wikiparser finishes (assuming that it finishes faster than generator requires wiki articles).
If wikiparser takes a lot of time, then it's better to run it out of band in advance, considering that its data is rarely updated anyway.
![]() Won't using & and Won't using & and `wait` at the end be enough?
![]() As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use As long as the machine it's running on has enough cores, fine for the maps server but it will bog down on my laptop. I'll use `&` for now.
![]() You may add an option to use only one core. You may add an option to use only one core.
|
17
src/html.rs
|
@ -22,6 +22,19 @@ static CONFIG: Lazy<Config<'static>> = Lazy::new(|| {
|
|||
static HEADERS: Lazy<Selector> =
|
||||
Lazy::new(|| Selector::parse("h1, h2, h3, h4, h5, h6, h7").unwrap());
|
||||
|
||||
/// Elements that should always be kept, regardless of other metrics.
|
||||
static ELEMENT_ALLOW_LIST: Lazy<Selector> = Lazy::new(|| {
|
||||
Selector::parse(
|
||||
&[
|
||||
// Meta tags that affect rendering.
|
||||
"head > meta[charset]",
|
||||
"head > meta[http-equiv]",
|
||||
]
|
||||
.join(", "),
|
||||
)
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
pub fn simplify(html: &str, lang: &str) -> String {
|
||||
let mut document = Html::parse_document(html);
|
||||
|
||||
|
@ -53,8 +66,6 @@ pub fn simplify(html: &str, lang: &str) -> String {
|
|||
}
|
||||
|
||||
remove_ids(&mut document, to_remove.drain(..));
|
||||
} else {
|
||||
warn!("No sections to remove configured for lang {lang:?}");
|
||||
}
|
||||
|
||||
for el in document
|
||||
|
@ -62,7 +73,7 @@ pub fn simplify(html: &str, lang: &str) -> String {
|
|||
.descendants()
|
||||
.filter_map(ElementRef::wrap)
|
||||
{
|
||||
if is_image(&el) || is_empty_or_whitespace(&el) {
|
||||
if (is_image(&el) || is_empty_or_whitespace(&el)) && !ELEMENT_ALLOW_LIST.matches(&el) {
|
||||
to_remove.push(el.id());
|
||||
}
|
||||
}
|
||||
|
|
33
src/main.rs
|
@ -15,11 +15,24 @@ use om_wikiparser::{
|
|||
wm::{parse_wikidata_file, parse_wikipedia_file, Page, WikipediaTitleNorm},
|
||||
};
|
||||
|
||||
/// Get the version returned by `git describe`, e.g.:
|
||||
/// - `v2.0` if a git tag
|
||||
/// - the commit hash `034ac04` if not a tag
|
||||
/// - `034ac04-dirty` if uncommited changes are present,
|
||||
/// or the crate version if not available (if installed from crates.io).
|
||||
///
|
||||
/// See `build.rs` file for more info.
|
||||
fn version() -> &'static str {
|
||||
option_env!("CARGO_GIT_VERSION")
|
||||
.or(option_env!("CARGO_PKG_VERSION"))
|
||||
.unwrap_or("unknown")
|
||||
}
|
||||
|
||||
/// Extract article HTML from Wikipedia Enterprise HTML dumps.
|
||||
///
|
||||
/// Expects an uncompressed dump connected to stdin.
|
||||
#[derive(Parser)]
|
||||
#[command(version)]
|
||||
#[command(version = crate::version())]
|
||||
struct Args {
|
||||
/// Directory to write the extracted articles to.
|
||||
output_dir: PathBuf,
|
||||
|
@ -38,6 +51,7 @@ struct Args {
|
|||
///
|
||||
/// Use this to save the QIDs of articles you know the url of, but not the QID.
|
||||
/// The same path can later be passed to the `--wikidata-ids` option to extract them from another language's dump.
|
||||
/// Writes are atomicly appended to the file, so the same path may be used by multiple concurrent instances.
|
||||
#[arg(long, requires("wikipedia_urls"))]
|
||||
write_new_ids: Option<PathBuf>,
|
||||
}
|
||||
|
@ -173,6 +187,8 @@ fn main() -> anyhow::Result<()> {
|
|||
.exit()
|
||||
}
|
||||
|
||||
info!("{} {}", Args::command().get_name(), version());
|
||||
|
||||
let wikipedia_titles = if let Some(path) = args.wikipedia_urls {
|
||||
info!("Loading article urls from {path:?}");
|
||||
let urls = parse_wikipedia_file(path)?;
|
||||
|
@ -191,6 +207,15 @@ fn main() -> anyhow::Result<()> {
|
|||
Default::default()
|
||||
};
|
||||
|
||||
// NOTE: For atomic writes to the same file across threads/processes:
|
||||
// - The file needs to be opened in APPEND mode (`.append(true)`).
|
||||
// - Each write needs to be a single syscall (for Rust, use `format!` for formatting before calling `write!`, or `write!` to a `String` first).
|
||||
// - Each write needs to be under `PIPE_BUF` size (see `man write(3)`), usually 4kb on Linux.
|
||||
//
|
||||
// For more information, see:
|
||||
// - `man write(3posix)`: https://www.man7.org/linux/man-pages/man3/write.3p.html
|
||||
// - `std::fs::OpenOptions::append`: https://doc.rust-lang.org/std/fs/struct.OpenOptions.html#method.append
|
||||
// - https://stackoverflow.com/questions/1154446/is-file-append-atomic-in-unix
|
||||
let mut write_new_ids = args
|
||||
.write_new_ids
|
||||
.as_ref()
|
||||
|
@ -240,10 +265,14 @@ fn main() -> anyhow::Result<()> {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Write matched new QIDs back to fild.
|
||||
if let (Some(f), Some(qid)) = (&mut write_new_ids, &qid) {
|
||||
if !is_wikidata_match && !matching_titles.is_empty() {
|
||||
debug!("Writing new id {} for article {:?}", qid, page.name);
|
||||
writeln!(f, "{}", qid).with_context(|| {
|
||||
// NOTE: Write to string buffer first to have a single atomic write syscall.
|
||||
// See `write_new_ids` for more info.
|
||||
let line = format!("{}\n", qid);
|
||||
write!(f, "{}", line).with_context(|| {
|
||||
format!(
|
||||
"writing new id to file {:?}",
|
||||
args.write_new_ids.as_ref().unwrap()
|
||||
|
|
Why is it needed?