#!/usr/bin/python import tweetstream import psycopg2 from email.utils import parseaddr, parsedate_tz, mktime_tz from datetime import datetime import pycurl, json conn = psycopg2.connect(database='tweets', user='hunter2', host='localhost') c = conn.cursor() locations = ["-180,-90", "180,90"] count = 0 skipped = 0 with tweetstream.FilterStream("hunter2", "hunter2", locations=locations) as stream: for tweet in stream: if not tweet.has_key('coordinates') or not tweet['coordinates']: skipped += 1 continue tid = int(tweet['id']) date = parsedate_tz(tweet['created_at']) date = datetime.fromtimestamp(mktime_tz(date)).strftime('%s') at = int(date) tuser = tweet['user']['screen_name'] lon, lat = tweet['coordinates']['coordinates'] lang = tweet['user']['lang'] try: c.execute("insert into tweets (tid, at, tuser, lat, lon, lang) values (%s, to_timestamp(%s), '%s', %s, %s, '%s');" % (tid, at, tuser, lat, lon, lang)) count += 1 except psycopg2.IntegrityError: conn.rollback() pass if count % 100 == 1: conn.commit() print count, skipped