From 30ec02004a2d8dcddfde6fff1e7605d36471549e Mon Sep 17 00:00:00 2001 From: Denis Koronchik Date: Mon, 23 Jun 2014 17:21:05 +0300 Subject: [PATCH] Add python script to calculate and visualize formula to calculate city radius based on it population --- tools/python/city_popul_sqr.data | 82 ++++++++++++++++ tools/python/city_radius.py | 157 +++++++++++++++++++++++++++++++ 2 files changed, 239 insertions(+) create mode 100644 tools/python/city_popul_sqr.data create mode 100644 tools/python/city_radius.py diff --git a/tools/python/city_popul_sqr.data b/tools/python/city_popul_sqr.data new file mode 100644 index 0000000000..848b9bb6a3 --- /dev/null +++ b/tools/python/city_popul_sqr.data @@ -0,0 +1,82 @@ +# City* Population*Region* | Width km* | Height km* | Square km2* | AVG | MAX | SQRT(S) +# Columns with * symbol are required +New York | 8405837 | USA | 56.5 | 50.2 | 1214 | 53.35 | 56.5 | 34.84250278 +Los Angeles | 3792621 | USA | 51 | 80.5 | 1301.97 | 65.75 | 80.5 | 36.0828214 +Chicago | 2714856 | USA | 20.9 | 44.6 | 606.1 | 32.75 | 44.6 | 24.61909828 +Philadelphia | 1447395 | USA | 30.3 | 41.8 | 369.9 | 36.05 | 41.8 | 19.23278451 +Dallas | 1223229 | USA | 38.6 | 50.7 | 997.1 | 44.65 | 50.7 | 31.57689028 +San Francisco | 815358 | USA | 12 | 12.7 | 600.6 | 12.35 | 12.7 | 24.50714182 +Detroit | 701475 | USA | 33.5 | 31.9 | 370.2 | 32.7 | 33.5 | 19.24058211 +Memphis | 677272 | USA | 38 | 32.2 | 763.4 | 35.1 | 38 | 27.62969417 +Seattle | 634535 | USA | 11.9 | 27.4 | 369.2 | 19.65 | 27.4 | 19.2145778 +Boston | 625087 | USA | 15.1 | 18.2 | 232.1 | 16.65 | 18.2 | 15.23482852 +Las Vegas | 596424 | USA | 43.1 | 31.9 | 340 | 37.5 | 43.1 | 18.43908891 +Atlanta | 443775 | USA | 26.6 | 29.3 | 343 | 27.95 | 29.3 | 18.52025918 +Miami | 433146 | USA | 13.7 | 15.8 | 92.42 | 14.75 | 15.8 | 9.613532129 +New Orleans | 343829 | USA | 46 | 15.1 | 907 | 30.55 | 46 | 30.11644069 +Pittsburgh | 312819 | USA | 23.3 | 26.9 | 151.1 | 25.1 | 26.9 | 12.292274 +Orlando | 238300 | USA | 24.8 | 22.2 | 286.7 | 23.5 | 24.8 | 16.93221781 +Des Moines | 203433 | USA | 18.2 | 17.2 | 213.9 | 17.7 | 18.2 | 14.62532051 +Salt Lake City | 181698 | USA | 27.8 | 12.1 | 285.9 | 19.95 | 27.8 | 16.9085777 +Aurora | 7508 | USA | 5 | 4.8 | 15.49 | 4.9 | 5 | 3.935733731 +Barthsville | 36245 | USA | 9.3 | 11.9 | 58.9 | 10.6 | 11.9 | 7.674633542 +Farmington | 45854 | USA | 10 | 12.9 | 69.93 | 11.45 | 12.9 | 8.362415919 +Dumas | 14989 | USA | 3.2 | 8.2 | 14.29 | 5.7 | 8.2 | 3.780211634 +Grand Island | 49989 | USA | 10.1 | 8.4 | 73.94 | 9.25 | 10.1 | 8.598837131 +Hastings | 25058 | USA | 8.9 | 6.3 | 35.38 | 7.6 | 8.9 | 5.948108943 +Decatur | 9362 | USA | 7.4 | 4.8 | 15 | 6.1 | 7.4 | 3.872983346 +Van Wert | 10844 | USA | 4.5 | 5.3 | 19.71 | 4.9 | 5.3 | 4.439594576 +Ottawa | 4417 | USA | 8 | 4.7 | 12.35 | 6.35 | 8 | 3.514256678 +Blythe | 20590 | USA | 9.5 | 22.9 | 69.86 | 16.2 | 22.9 | 8.358229478 +Morgan Hill | 39420 | USA | 11.3 | 13.9 | 33.36 | 12.6 | 13.9 | 5.775811631 +London | 8308369 | Europe | 58.9 | 49.9 | 1572 | 54.4 | 58.9 | 39.6484552 +Athens | 3074160 | Europe | 9.9 | 10.7 | 412 | 10.3 | 10.7 | 20.29778313 +Berlin | 3397469 | Europe | 40.7 | 37.8 | 891.85 | 39.25 | 40.7 | 29.86385775 +Madrid | 3215633 | Europe | 32 | 25.1 | 605.77 | 28.55 | 32 | 24.61239525 +Rome | 2863322 | Europe | 28.2 | 31.9 | 1285.31 | 30.05 | 31.9 | 35.85122034 +Paris | 10413386 | Europe | 10.9 | 10.6 | 2844.80 | 10.75 | 10.9 | 53.33666656 +Bucharest | 1883425 | Europe | 26.1 | 27 | 228 | 26.55 | 27 | 15.09966887 +Hamburg | 1751775 | Europe | 29.3 | 33.4 | 755 | 31.35 | 33.4 | 27.47726333 +Vienna | 1765649 | Europe | 30.1 | 15.2 | 414.65 | 22.65 | 30.1 | 20.36295656 +Warsaw | 1715517 | Europe | 28.1 | 31.2 | 517.24 | 29.65 | 31.2 | 22.74291098 +Barselona | 1620943 | Europe | 17.7 | 18.5 | 101.9 | 18.1 | 18.5 | 10.09455299 +Munich | 1388308 | Europe | 30.2 | 20.3 | 310.43 | 25.25 | 30.2 | 17.61902381 +Milan | 1353882 | Europe | 14.5 | 26 | 181.76 | 20.25 | 26 | 13.48183964 +Prague | 1243201 | Europe | 34 | 24.5 | 496 | 29.25 | 34 | 22.27105745 +Brussels | 1138854 | Europe | 16.1 | 16.1 | 161.38 | 16.1 | 16.1 | 12.70354281 +Birmingham | 1085400 | Europe | 14.8 | 14.8 | 103.39 | 14.8 | 14.8 | 10.16808733 +Rennes | 208033 | Europe | 8 | 7.5 | 50.39 | 7.75 | 8 | 7.09859141 +Lisboa | 547631 | Europe | 8.4 | 7.8 | 958 | 8.1 | 8.4 | 30.95157508 +Sevilla | 703021 | Europe | 18.4 | 12.4 | 140 | 15.4 | 18.4 | 11.83215957 +Malaga | 568507 | Europe | 14 | 9.4 | 395 | 11.7 | 14 | 19.87460691 +Genova | 604848 | Europe | 29.9 | 9.5 | 243.6 | 19.7 | 29.9 | 15.60769041 +Parma | 187214 | Europe | 8.9 | 7.9 | 260.77 | 8.4 | 8.9 | 16.14837453 +Bologna | 384038 | Europe | 16.5 | 13.8 | 140.7 | 15.15 | 16.5 | 11.86170308 +Erlangen | 105412 | Europe | 11.6 | 12.4 | 76.9 | 12 | 12.4 | 8.769264507 +Kaltenkirchen | 19904 | Europe | 8.7 | 5.3 | 23.1 | 7 | 8.7 | 4.806245936 +Lier | 33492 | Europe | 9.5 | 13 | 49.7 | 11.25 | 13 | 7.049822693 +Decin | 50311 | Europe | 15.6 | 21.1 | 140 | 18.35 | 21.1 | 11.83215957 +Marianske Lazne | 14083 | Europe | 9.6 | 17.9 | 51.81 | 13.75 | 17.9 | 7.197916365 +Bindlach | 7211 | Europe | 9.6 | 8 | 37.6 | 8.8 | 9.6 | 6.131883887 +Moscow | 11503501 | Russia | 32.2 | 46.8 | 2511 | | | +Saint Peterburg | 4879566 | Russia | 18.6 | 36.1 | 1439 | | | +Novosibirsk | 1473754 | Russia | 22.6 | 43.8 | 502.1 | | | +Yekaterinburg | 1349772 | Russia | 24.4 | 33 | 495 | | | +Nizhny Novgorod | 1250619 | Russia | 24.1 | 30.4 | 410.68 | | | +Samara | 1164685 | Russia | 20.1 | 44.7 | 541.382 | | | +Kazan | 1143535 | Russia | 34.3 | 29.3 | 425.3 | | | +Rostov-on-Don | 1089261 | Russia | 59.2 | 26.4 | 348.5 | | | +Volgograd | 1021215 | Russia | 12.4 | 49.2 | 859.353 | | | +Krasnoyarsk | 1035528 | Russia | 36.2 | 23.8 | 348 | | | +Saratov | 839755 | Russia | 25.1 | 35.2 | 394 | | | +Krasnodar | 805680 | Russia | 26.2 | 20.7 | 192.19 | | | +Tolyatti | 718127 | Russia | 31 | 15 | 314.78 | | | +Izhevsk | 637309 | Russia | 32.7 | 21.3 | 315.15 | | | +Vladivostok | 600378 | Russia | 16.4 | 39.6 | 331.16 | | | +Orenburg | 560046 | Russia | 13.6 | 36.3 | 259 | | | +Tula | 490508 | Russia | 18.2 | 21.8 | 145.8 | | | +Cheboksary | 479266 | Russia | 16.7 | 13.9 | 250.87 | | | +Tver | 411044 | Russia | 20.3 | 27.8 | 152.22 | | | +Arkhangelsk | 350985 | Russia | 13.8 | 35.7 | 294.42 | | | +Vologda | 306487 | Russia | 18.1 | 11.1 | 116 | | | +Nizhnevartovsk | 265994 | Russia | 20.1 | 13.7 | 71 | | | diff --git a/tools/python/city_radius.py b/tools/python/city_radius.py new file mode 100644 index 0000000000..274149da56 --- /dev/null +++ b/tools/python/city_radius.py @@ -0,0 +1,157 @@ +import sys, os, math +import matplotlib.pyplot as plt +from optparse import OptionParser + + +cities = [] + +def strip(s): + return s.strip('\t\n ') + +def load_data(path): + + global cities + + f = open(path, 'r') + lines = f.readlines() + f.close(); + + for l in lines: + + if l.startswith('#'): + continue + + data = l.split('|') + + if len(data) < 6: + continue + + item = {} + + item['name'] = strip(data[0]) + item['population'] = int(strip(data[1])) + item['region'] = strip(data[2]) + item['width'] = float(strip(data[3])) + item['height'] = float(strip(data[4])) + + item['square'] = float(data[5]) + + cities.append(item) + + # build plot + print "Cities count: %d" % len(cities) + +def formula(popul, base = 32, mult = 0.5): + return math.exp(math.log(popul, base)) * mult + +def avgDistance(approx, data): + dist = 0 + for x in xrange(len(data)): + dist += math.fabs(approx[x] - data[x]) + return dist / float(len(data)) + +def findBest(popul, data, minBase = 5, maxBase = 100, stepBase = 0.1, minMult = 0.01, maxMult = 1, stepMult = 0.01): + + # try to find best parameters + base = minBase + + minDist = -1 + bestMult = minMult + bestBase = base + + while base <= maxBase: + print "%.02f%% best mult: %f, best base: %f, best dist: %f" % (100 * (base - minBase) / (maxBase - minBase), bestMult, bestBase, minDist) + mult = minMult + + while mult <= maxMult: + approx = [] + + for p in popul: + approx.append(formula(p, base, mult)) + + dist = avgDistance(approx, data) + + if minDist < 0 or minDist > dist: + minDist = dist + bestBase = base + bestMult = mult + + mult += stepMult + + base += stepBase + + return (bestBase, bestMult) + +def process_data(steps_count, base, mult, bestFind = False, dataFlag = 0): + avgData = [] + maxData = [] + sqrData = [] + population = [] + maxPopulation = 0 + minPopulation = -1 + for city in cities: + p = city['population'] + w = city['width'] + h = city['height'] + s = city['square'] + population.append(p) + if p > maxPopulation: + maxPopulation = p + if minPopulation < 0 or p < minPopulation: + minPopulation = p + + maxData.append(max([w, h])) + avgData.append((w + h) * 0.5) + sqrData.append(math.sqrt(s)) + + + bestBase = base + bestMult = mult + if bestFind: + d = maxData + if dataFlag == 1: + d = avgData + elif dataFlag == 2: + d = sqrData + bestBase, bestMult = findBest(population, d) + + print "Finished\n\nBest mult: %f, Best base: %f" % (bestMult, bestBase) + + approx = [] + population2 = [] + v = minPopulation + step = (maxPopulation - minPopulation) / float(steps_count) + for i in xrange(0, steps_count): + approx.append(formula(v, bestBase, bestMult)) + population2.append(v) + v += step + + plt.plot(population, avgData, 'bo', population, maxData, 'ro', population, sqrData, 'go', population2, approx, 'y') + plt.axis([minPopulation, maxPopulation, 0, 100]) + plt.xscale('log') + plt.show() + +if __name__ == "__main__": + + if len(sys.argv) < 3: + print 'city_radius.py ' + + parser = OptionParser() + parser.add_option("-f", "--file", dest="filename", default="city_popul_sqr.data", + help="source data file", metavar="path") + parser.add_option("-s", "--scan", + dest="best", default=False, action="store_true", + help="scan best values of mult and base") + parser.add_option('-m', "--mult", + dest='mult', default=1, + help='multiplier value') + parser.add_option('-b', '--base', + dest='base', default=35, + help="base value") + parser.add_option('-d', '--data', + default=0, dest='data', + help="Dataset to use on best values scan: 0 - max, 1 - avg, 2 - sqr") + + (options, args) = parser.parse_args() + load_data(options.filename) + process_data(1000, float(options.base), float(options.mult), options.best, int(options.data))