From f86dc70522da01fa20f721b8dbe9e473cf418fab Mon Sep 17 00:00:00 2001 From: Maciej Sikorski Date: Fri, 26 Aug 2022 19:04:17 +0200 Subject: [PATCH 1/4] [transit]Added argument to download mdb data only for desired countries Optional argument to only download GTFS data from selected countries. Might help in avoiding dealing with large datasets (~35GB). Works only for MobilityDatabse as its .csv file contains country code for each item. Signed-off-by: Maciej Sikorski --- tools/python/transit/gtfs/download_gtfs.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tools/python/transit/gtfs/download_gtfs.py b/tools/python/transit/gtfs/download_gtfs.py index 3e54c24f53..3de15dfa3d 100644 --- a/tools/python/transit/gtfs/download_gtfs.py +++ b/tools/python/transit/gtfs/download_gtfs.py @@ -45,13 +45,17 @@ def download_gtfs_sources_mobilitydb(path): f"HTTP error {http_err} downloading zip from {URL_MOBILITYDB_GTFS_SOURCE}") -def get_gtfs_urls_mobilitydb(path): +def get_gtfs_urls_mobilitydb(path, countries_list): """Extracts the feed urls from the downloaded csv file""" + download_from_all_countries = False + if not countries_list: + download_from_all_countries = True + download_gtfs_sources_mobilitydb(path) file = open(os.path.join(path, RAW_FILE_MOBILITYDB), encoding='UTF-8') raw_sources = csv.DictReader(file) next(raw_sources) - urls = [field["urls.direct_download"] for field in raw_sources] + urls = [field["urls.direct_download"] for field in raw_sources if field["location.country_code"] in countries_list or download_from_all_countries] write_list_to_file(os.path.join(path, URLS_FILE_MOBILITYDB), urls) @@ -238,6 +242,13 @@ def main(): help="user key for working with transitland API v2" ) + # Example: to download data only for Germany and France use "--mdb_countries DE FR" + parser.add_argument( + "-c", + "--mdb_countries", + help="use data from MobilityDatabase only from selected countries (use ISO codes)", + ) + args = parser.parse_args() logging.basicConfig( @@ -249,7 +260,11 @@ def main(): if args.mode in ["fullrun", "load_feed_urls"]: if args.source in ["all", "mobilitydb"]: - get_gtfs_urls_mobilitydb(args.path) + mdb_countries = [] + if args.mdb_countries: + mdb_countries = args.mdb_countries.split() + + get_gtfs_urls_mobilitydb(args.path, mdb_countries) if args.source in ["all", "transitland"]: if not args.transitland_api_key: logger.error( -- 2.45.3 From d4625a94a5d68723084dfd9044709b497eeb06e1 Mon Sep 17 00:00:00 2001 From: Maciej Sikorski Date: Fri, 26 Aug 2022 19:04:17 +0200 Subject: [PATCH 2/4] [transit]Added argument to download mdb data only for desired countries Optional argument to only download GTFS data from selected countries. Might help in avoiding dealing with large datasets (~35GB). Works only for MobilityDatabse as its .csv file contains country code for each item. Signed-off-by: Maciej Sikorski --- tools/python/transit/gtfs/download_gtfs.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tools/python/transit/gtfs/download_gtfs.py b/tools/python/transit/gtfs/download_gtfs.py index 3e54c24f53..3de15dfa3d 100644 --- a/tools/python/transit/gtfs/download_gtfs.py +++ b/tools/python/transit/gtfs/download_gtfs.py @@ -45,13 +45,17 @@ def download_gtfs_sources_mobilitydb(path): f"HTTP error {http_err} downloading zip from {URL_MOBILITYDB_GTFS_SOURCE}") -def get_gtfs_urls_mobilitydb(path): +def get_gtfs_urls_mobilitydb(path, countries_list): """Extracts the feed urls from the downloaded csv file""" + download_from_all_countries = False + if not countries_list: + download_from_all_countries = True + download_gtfs_sources_mobilitydb(path) file = open(os.path.join(path, RAW_FILE_MOBILITYDB), encoding='UTF-8') raw_sources = csv.DictReader(file) next(raw_sources) - urls = [field["urls.direct_download"] for field in raw_sources] + urls = [field["urls.direct_download"] for field in raw_sources if field["location.country_code"] in countries_list or download_from_all_countries] write_list_to_file(os.path.join(path, URLS_FILE_MOBILITYDB), urls) @@ -238,6 +242,13 @@ def main(): help="user key for working with transitland API v2" ) + # Example: to download data only for Germany and France use "--mdb_countries DE FR" + parser.add_argument( + "-c", + "--mdb_countries", + help="use data from MobilityDatabase only from selected countries (use ISO codes)", + ) + args = parser.parse_args() logging.basicConfig( @@ -249,7 +260,11 @@ def main(): if args.mode in ["fullrun", "load_feed_urls"]: if args.source in ["all", "mobilitydb"]: - get_gtfs_urls_mobilitydb(args.path) + mdb_countries = [] + if args.mdb_countries: + mdb_countries = args.mdb_countries.split() + + get_gtfs_urls_mobilitydb(args.path, mdb_countries) if args.source in ["all", "transitland"]: if not args.transitland_api_key: logger.error( -- 2.45.3 From 82265aa5d4b100c642b634b9ec35858b84351736 Mon Sep 17 00:00:00 2001 From: Maciej Sikorski Date: Mon, 29 Aug 2022 10:32:15 +0200 Subject: [PATCH 3/4] Update download_gtfs.py Signed-off-by: Maciej Sikorski --- tools/python/transit/gtfs/download_gtfs.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/python/transit/gtfs/download_gtfs.py b/tools/python/transit/gtfs/download_gtfs.py index 3de15dfa3d..cfd24190bd 100644 --- a/tools/python/transit/gtfs/download_gtfs.py +++ b/tools/python/transit/gtfs/download_gtfs.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 """Parses GTFS feeds urls: https://transit.land/ - Transitland https://storage.googleapis.com/storage/v1/b/mdb-csv/o/sources.csv?alt=media @@ -47,9 +48,9 @@ def download_gtfs_sources_mobilitydb(path): def get_gtfs_urls_mobilitydb(path, countries_list): """Extracts the feed urls from the downloaded csv file""" - download_from_all_countries = False - if not countries_list: - download_from_all_countries = True + download_from_all_countries = True + if countries_list: + download_from_all_countries = False download_gtfs_sources_mobilitydb(path) file = open(os.path.join(path, RAW_FILE_MOBILITYDB), encoding='UTF-8') @@ -242,7 +243,7 @@ def main(): help="user key for working with transitland API v2" ) - # Example: to download data only for Germany and France use "--mdb_countries DE FR" + # Example: to download data only for Germany and France use "--mdb_countries DE,FR" parser.add_argument( "-c", "--mdb_countries", @@ -262,7 +263,7 @@ def main(): if args.source in ["all", "mobilitydb"]: mdb_countries = [] if args.mdb_countries: - mdb_countries = args.mdb_countries.split() + mdb_countries = args.mdb_countries.split(',') get_gtfs_urls_mobilitydb(args.path, mdb_countries) if args.source in ["all", "transitland"]: -- 2.45.3 From 63e9a0489513ff48f0bdd506b3f72e5f04aa984a Mon Sep 17 00:00:00 2001 From: Maciej Sikorski Date: Tue, 30 Aug 2022 11:30:42 +0200 Subject: [PATCH 4/4] Update download_gtfs.py Signed-off-by: Maciej Sikorski --- tools/python/transit/gtfs/download_gtfs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/python/transit/gtfs/download_gtfs.py b/tools/python/transit/gtfs/download_gtfs.py index cfd24190bd..e87eee5593 100644 --- a/tools/python/transit/gtfs/download_gtfs.py +++ b/tools/python/transit/gtfs/download_gtfs.py @@ -56,7 +56,7 @@ def get_gtfs_urls_mobilitydb(path, countries_list): file = open(os.path.join(path, RAW_FILE_MOBILITYDB), encoding='UTF-8') raw_sources = csv.DictReader(file) next(raw_sources) - urls = [field["urls.direct_download"] for field in raw_sources if field["location.country_code"] in countries_list or download_from_all_countries] + urls = [field["urls.direct_download"] for field in raw_sources if download_from_all_countries or field["location.country_code"] in countries_list] write_list_to_file(os.path.join(path, URLS_FILE_MOBILITYDB), urls) -- 2.45.3