This repository has been archived on 2025-03-22. You can view files and clone it, but cannot push or open issues or pull requests.
travelguide/geocoder/geocode-1.py
2014-05-08 22:28:42 +07:00

134 lines
4.3 KiB
Python

# -*- coding: utf-8 -*-
import sys
import os
import shutil
import unicodedata
import urllib
import pprint
import json
import hashlib
try:
from bs4 import BeautifulSoup
except ImportError:
from BeautifulSoup import BeautifulSoup
reload(sys)
sys.setdefaultencoding('utf-8')
"""
page_address = {
"countrycodes": "CH",
"country": "Switzerland",
"city": "Zurich"
}
soup = BeautifulSoup(open("Zurich.html"))
"""
page_address = {
"countrycodes": "GB",
"country": "United Kingdom",
"city": "London"
}
soup = BeautifulSoup(open("London.html"))
page_address = {
"countrycodes": "RU",
"country": "Russia",
"city": "Murmansk"
}
soup = BeautifulSoup(open("Murmansk.html"))
page_address = {
"countrycodes": "JP",
"country": "Japan",
"city": "Tokyo"
}
soup = BeautifulSoup(open("Tokyo.html"))
def nominatim_geocode(address):
try:
os.makedirs("nominatim")
except OSError:
pass
key = os.path.join("nominatim", hashlib.md5(json.dumps(address)).hexdigest())
if os.path.exists(key):
val = open(key).read()
else:
val = urllib.urlopen("http://open.mapquestapi.com/nominatim/v1/search.php?format=json&accept-language=en" + urllib.urlencode(address)).read()
open(key, "w").write(val)
try:
val = json.loads(val)
except ValueError:
val = ""
return val
def nominatim_bbox_transform(bbox):
return [float(bbox[2]), float(bbox[0]), float(bbox[3]), float(bbox[1])]
def geocode_bbox(string, city_bbox, place_params):
place_params["q"] = string
is_house = any(char.isdigit() for char in str(string))
try:
for place in nominatim_geocode(place_params):
if is_house and place["class"] == "highway":
continue
#print city_bbox
if not(float(place["lon"]) < city_bbox[0] or float(place["lon"]) > city_bbox[2] or float(place["lat"]) < city_bbox[1] or float(place["lat"]) > city_bbox[3]):
print place
break
else:
raise IndexError
# print >> osm, '<node id="%s" lat="%s" lon="%s" version="1"><tag k="name" v="%s" /></node>'%(id, place["lat"], place["lon"], i)
except IndexError:
return False
return (float(place["lon"]), float(place["lat"]))
#pprint.pprint(texts_to_geocode)
#print len(texts_to_geocode)
city_geocode = nominatim_geocode(page_address)[0]
city_bbox = nominatim_bbox_transform(city_geocode["boundingbox"])
place_params = {"viewbox": ",".join([str(x) for x in city_bbox]), "countrycodes": page_address["countrycodes"]}
texts_to_geocode = set()
for s in soup.findAll("b") + soup.findAll("span", "label listing-address"):
coord = geocode_bbox(s.getText(), city_bbox, place_params)
if coord:
print s.getText(), coord
hrefLink = u"mapswithme://map?v=1&ll=%s,%s" % (coord[1], coord[0])
hrefLink = "http://www.openstreetmap.org/?mlat=%s&mlon=%s#map=19/%s/%s" % (coord[1], coord[0], coord[1], coord[0])
mapTag = soup.new_tag("a", href=hrefLink)
mapTag["class"] = "geolink"
mapTag.string = "[map]"
s.append(mapTag)
# s["style"] = "color: red;"
else:
print s.getText(), "BAD!!!"
open('out.html', 'w').write(str(soup))
osm = open("file.osm", "w")
print >> osm, '<osm version="0.6">'
id = 0
bad_cnt = 0
for i in texts_to_geocode:
id += 1
place_params["q"] = i
try:
for place in nominatim_geocode(place_params):
print place
print city_bbox
if not(float(place["lon"]) < city_bbox[0] or float(place["lon"]) > city_bbox[2] or float(place["lat"]) < city_bbox[1] or float(place["lat"]) > city_bbox[3]):
break
else:
raise IndexError
print >> osm, '<node id="%s" lat="%s" lon="%s" version="1"><tag k="name" v="%s" /></node>'%(id, place["lat"], place["lon"], i)
#print >> osm, '<node id="%s" lat="%s" lon="%s" version="1"><tag k="name" v="%s" /></node>'%(id, place_params["viewbox"].split(",")[0], place_params["viewbox"].split(",")[1], i)
#id += 1
#print >> osm, '<node id="%s" lat="%s" lon="%s" version="1"><tag k="name" v="%s" /></node>'%(id, place_params["viewbox"].split(",")[2], place_params["viewbox"].split(",")[3], i)
except IndexError:
bad_cnt += 1
print bad_cnt, i
print >> osm, '</osm>'