From 61b1f47dcb96b38b20a01634ad91e1111c9ec7cf Mon Sep 17 00:00:00 2001 From: Maciej Sikorski Date: Fri, 2 Sep 2022 22:26:31 +0200 Subject: [PATCH] [transit] Added optional argument to download GTFS data only from needed countries (#3262) * [transit]Added argument to download mdb data only for desired countries Optional argument to only download GTFS data from selected countries. Might help in avoiding dealing with large datasets (~35GB). Works only for MobilityDatabse as its .csv file contains country code for each item. Signed-off-by: Maciej Sikorski * [transit]Added argument to download mdb data only for desired countries Optional argument to only download GTFS data from selected countries. Might help in avoiding dealing with large datasets (~35GB). Works only for MobilityDatabse as its .csv file contains country code for each item. Signed-off-by: Maciej Sikorski * Update download_gtfs.py Signed-off-by: Maciej Sikorski * Update download_gtfs.py Signed-off-by: Maciej Sikorski Signed-off-by: Maciej Sikorski --- tools/python/transit/gtfs/download_gtfs.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/tools/python/transit/gtfs/download_gtfs.py b/tools/python/transit/gtfs/download_gtfs.py index 3e54c24f53..e87eee5593 100644 --- a/tools/python/transit/gtfs/download_gtfs.py +++ b/tools/python/transit/gtfs/download_gtfs.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 """Parses GTFS feeds urls: https://transit.land/ - Transitland https://storage.googleapis.com/storage/v1/b/mdb-csv/o/sources.csv?alt=media @@ -45,13 +46,17 @@ def download_gtfs_sources_mobilitydb(path): f"HTTP error {http_err} downloading zip from {URL_MOBILITYDB_GTFS_SOURCE}") -def get_gtfs_urls_mobilitydb(path): +def get_gtfs_urls_mobilitydb(path, countries_list): """Extracts the feed urls from the downloaded csv file""" + download_from_all_countries = True + if countries_list: + download_from_all_countries = False + download_gtfs_sources_mobilitydb(path) file = open(os.path.join(path, RAW_FILE_MOBILITYDB), encoding='UTF-8') raw_sources = csv.DictReader(file) next(raw_sources) - urls = [field["urls.direct_download"] for field in raw_sources] + urls = [field["urls.direct_download"] for field in raw_sources if download_from_all_countries or field["location.country_code"] in countries_list] write_list_to_file(os.path.join(path, URLS_FILE_MOBILITYDB), urls) @@ -238,6 +243,13 @@ def main(): help="user key for working with transitland API v2" ) + # Example: to download data only for Germany and France use "--mdb_countries DE,FR" + parser.add_argument( + "-c", + "--mdb_countries", + help="use data from MobilityDatabase only from selected countries (use ISO codes)", + ) + args = parser.parse_args() logging.basicConfig( @@ -249,7 +261,11 @@ def main(): if args.mode in ["fullrun", "load_feed_urls"]: if args.source in ["all", "mobilitydb"]: - get_gtfs_urls_mobilitydb(args.path) + mdb_countries = [] + if args.mdb_countries: + mdb_countries = args.mdb_countries.split(',') + + get_gtfs_urls_mobilitydb(args.path, mdb_countries) if args.source in ["all", "transitland"]: if not args.transitland_api_key: logger.error(