Finish note processing

This commit is contained in:
Ilya Zverev 2016-06-14 16:08:13 +03:00
parent 3afa3cc132
commit d606b4e099

View file

@ -1,12 +1,19 @@
#!/usr/bin/env python
import bz2, urllib2, json
import urllib2
import json
from tempfile import TemporaryFile
from bz2file import BZ2File
from db import *
from db import database, Change
from lxml import etree
from datetime import datetime
def hour_difference(start, timestamp):
last_ts = datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%SZ')
diff = last_ts - start
return diff.seconds / 3600 + diff.days * 24
def process_notes():
response = urllib2.urlopen('http://planet.openstreetmap.org/notes/planet-notes-latest.osn.bz2')
# Parsing bz2 through a temporary file
@ -23,24 +30,26 @@ def process_notes():
with BZ2File(tmpfile) as f:
for event, element in etree.iterparse(f):
if element.tag == 'note':
if len(element) > 0 and '#mapsme' in element[0].text:
if len(element) > 0 and element[0].text and '#mapsme' in element[0].text:
note_id = element.get('id')
try:
ch = Change.get(Change.changeset == note_id, Change.action == 'n')
if element[-1].get('action') == 'closed':
# TODO: mark as closed
last_ts = element[-1].get('timestamp')
if element[-1].get('action') == 'closed' and ch.processed is None:
print('Found closed note {0}'.format(note_id))
ch.processed = hour_difference(ch.timestamp, element[-1].get('timestamp'))
ch.save()
except Change.DoesNotExist:
print 'Found new note', note_id, 'by', element[0].get('user')
# No such note, add it
ch = Change()
ch.changeset = note_id
ch.user = element[0].get('user') if element[0].get('uid') else 'Anonymous'
ch.version = ''
ch.timestamp = element[0].get('timestamp')
ch.action = 'n'
changes = [(element.get('lon'), element.get('lat')), {'name': element[0].text}]
ch.changes = json.dumps(changes)
ch.version = ''
ch.changeset = note_id
ch.user = element[0].get('user') if element[0].get('uid') else 'Anonymous Note'
print('Found new note {0} by {1}'.format(note_id, ch.user.encode('utf-8')))
ch.timestamp = datetime.strptime(element[0].get('timestamp'), '%Y-%m-%dT%H:%M:%SZ')
if element[-1].get('action') == 'closed' and ch.processed is None:
ch.processed = hour_difference(ch.timestamp, element[-1].get('timestamp'))
changes = [(element.get('lon'), element.get('lat')), {'note': element[0].text}]
ch.changes = json.dumps(changes, ensure_ascii=False)
ch.save()
element.clear()