diff --git a/tools/download_statistics/aggregator.py b/tools/download_statistics/aggregator.py deleted file mode 100755 index 5c6415b5da..0000000000 --- a/tools/download_statistics/aggregator.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python3 -#coding: utf-8 - -from collections import defaultdict -import sys -import datetime - -result = defaultdict(lambda : defaultdict(lambda :defaultdict(set))) - - - -def print_result(): - for date_key in result.iterkeys(): - year, month, req_type = date_key.split('_') - for from_country in result[date_key].iterkeys(): - for req_country in result[date_key][from_country].iterkeys(): - print '{};{:02d};{};{};{};{}'.format(year,int(month),from_country,req_country,req_type,len(result[date_key][from_country][req_country])) - -try: - with sys.stdin as file: - for rec in file: - try: - parts = rec.strip().split('|') - req_type = 'R' if len(parts) == 6 and parts[5]=='.routing' else 'M' - from_country = parts[0] - date = datetime.datetime.strptime(parts[2], '%d/%b/%Y:%H:%M:%S') - user_id = parts[3] - req_country = parts[4].split('_')[0] - date_key = '{}_{}_{}'.format(date.year,date.month,req_type) - user_key = '{}_{}'.format(user_id,req_country) - result[date_key][from_country][req_country].add(user_key) - except: - pass # ignore all errors for one string -except KeyboardInterrupt: - print_result() - exit(0) -except: - print_result() - raise - -print_result() - - diff --git a/tools/download_statistics/calc_download_stats.sql b/tools/download_statistics/calc_download_stats.sql deleted file mode 100644 index 5f6e48ba73..0000000000 --- a/tools/download_statistics/calc_download_stats.sql +++ /dev/null @@ -1,40 +0,0 @@ -drop table if exists downloads; -create table downloads(year int, month int, country_from varchar(255), country_to varchar(255), type char(1), count int); - -load data infile '/tmp/aggregated_download_info.csv' into table downloads fields terminated by ';' lines terminated by '\n'; - -delete from downloads where country_to in (select country_to from (select country_to, SUM(count) as cnt from downloads group by country_to having cnt < 1000) as z); - -delete from downloads where country_from in ('None', 'Unknown'); - -update downloads set country_from='USA' where country_from='United States'; -update downloads set country_from='UK' where country_from='United Kingdom'; -update downloads set country_from='Moldova' where country_from='Republic of Moldova'; -update downloads set country_from='Lithuania' where country_from='Republic of Lithuania'; -update downloads set country_from='Burma' where country_from='Myanmar [Burma]'; -update downloads set country_from='China' where country_from='Hong Kong'; -update downloads set country_from='China' where country_from='Macao'; -update downloads set country_from='South Korea' where country_from='Republic of Korea'; -update downloads set country_from='Jordan' where country_from='Hashemite Kingdom of Jordan'; -update downloads set country_from='Italy' where country_from='San Marino'; -update downloads set country_from='Micronesia' where country_from='Federated States of Micronesia'; -update downloads set country_from='United States Virgin Islands' where country_from='U.S. Virgin Islands'; -update downloads set country_from='Congo-Kinshasa' where country_from='Congo'; -update downloads set country_from='Congo-Brazzaville' where country_from='Republic of the Congo'; -update downloads set country_from='Saint Martin' where country_from='Sint Maarten'; -update downloads set country_from='Netherlands Antilles' where country_from='Bonaire'; - -select distinct country_from as country_from_that_never_to from downloads where country_from not in (select country_to from downloads); -select distinct country_to as country_to_that_never_from from downloads where country_to not in (select country_from from downloads); - -select country_to as most_downloaded_country, SUM(count) as cnt from downloads where type='M' group by country_to order by cnt desc limit 20; -select country_to as most_downloaded_country_from_outside, SUM(count) as cnt from downloads where type='M' and country_from <> country_to group by country_to order by cnt desc limit 20; -select country_from as most_downloading_country, SUM(count) as cnt from downloads where type='M' group by country_from order by cnt desc limit 20; - - -select country_from, in_cnt, out_cnt, out_cnt * 1.0 / in_cnt as ratio from \ - (select country_from, SUM(in_cnt) as in_cnt, SUM(out_cnt) as out_cnt from ( \ - select country_from, SUM(count) as in_cnt, 0 as out_cnt from downloads where type='M' and country_from=country_to group by country_from \ - union select country_from, 0 as in_cnt, SUM(count) as out_cnt from downloads where type='M' and country_from<>country_to group by country_from \ - ) as z group by country_from \ -) as zz order by ratio desc; diff --git a/tools/download_statistics/calc_statistic.sh b/tools/download_statistics/calc_statistic.sh deleted file mode 100755 index 0e3254b6f7..0000000000 --- a/tools/download_statistics/calc_statistic.sh +++ /dev/null @@ -1 +0,0 @@ -./linezier.sh | ./remover.sh | ./columnizer.sh | ./resolver.py | ./aggregator.py diff --git a/tools/download_statistics/columnizer.sh b/tools/download_statistics/columnizer.sh deleted file mode 100755 index 3710833576..0000000000 --- a/tools/download_statistics/columnizer.sh +++ /dev/null @@ -1 +0,0 @@ -sed 's/ \[/|/;s/\] "/|/;s/" /|/;s/\.mwm/|/' | awk '!x[$0]++' \ No newline at end of file diff --git a/tools/download_statistics/linezier.sh b/tools/download_statistics/linezier.sh deleted file mode 100755 index 7503bc7e6e..0000000000 --- a/tools/download_statistics/linezier.sh +++ /dev/null @@ -1 +0,0 @@ -sed -e :a -e '$!N;s/\n\t/ | /;ta;' -e 'P;D' | grep -e 'COUNTRY:' \ No newline at end of file diff --git a/tools/download_statistics/remover.sh b/tools/download_statistics/remover.sh deleted file mode 100755 index 66f60e1f39..0000000000 --- a/tools/download_statistics/remover.sh +++ /dev/null @@ -1 +0,0 @@ -sed 's/ "[a-zA-Z0-9/._ ]*" / /1;s/ - - / /;s/ -0.00//;s/ ... ... -/ /;s/| \(.*\) COUNTRY://;s/ |\(.*\)$//' \ No newline at end of file diff --git a/tools/download_statistics/requirements.txt b/tools/download_statistics/requirements.txt deleted file mode 100644 index 12e35115a1..0000000000 --- a/tools/download_statistics/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -Required python 2.7 and additional modules: -geoip2 -ipaddr -maxminddb - -also reuired IP to geo database from http://dev.maxmind.com/geoip/legacy/geolite/ - -GeoLite2-Country.mmdb diff --git a/tools/download_statistics/resolver.py b/tools/download_statistics/resolver.py deleted file mode 100755 index 41a16d59f5..0000000000 --- a/tools/download_statistics/resolver.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python3 -#coding: utf-8 - -import geoip2.database -import sys -from collections import defaultdict - -reader = geoip2.database.Reader('./GeoLite2-Country.mmdb') - -try: - with sys.stdin as file: - for rec in file: - try: - parts = rec.strip().split('|') - ip = parts[0] - from_country = None - try: - from_country = reader.country(ip).country.name - except geoip2.errors.AddressNotFoundError: - from_country = 'Unknown' - - print '{}|{}'.format(from_country,'|'.join(parts)) -# print '{} | {} {} {} | {} | {} | {}'.format(from_country, date[0], date[1], date[2][:4], ip, parts[1][1:13], parts[1][parts[1].find(':')+1:-1]) - except: - pass # ignore all errors for one string -except KeyboardInterrupt: - exit(0) -except: - raise - -