# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
+import json
import logging
+import re
import threading
import time
_twitter = None
_thread = None
+_match_pattern = None
class Streamer(TwythonStreamer):
count = 0
#logger.info("Stream text: "+data['text'])
#logger.info("Stream tweet: {}".format(data))
tweet = Tweet.from_status(data)
+ tweet.conforms_to_terms = check_match(data)
tweet.save()
self.count += 1
if self.count%100 == 0:
def start_stream(terms):
global _twitter
global _thread
+ global _match_pattern
logger.info("Starting Twitter stream for {}...".format(terms))
if len(terms)==0:
logger.warning("No terms given for twitter stream, not starting stream...")
return
+
+ _match_pattern = re.compile("|".join(terms), flags=re.IGNORECASE)
+
_twitter = Streamer(settings.TWEET_OAUTH_CONSUMER_KEY,
settings.TWEET_OAUTH_CONSUMER_SECRET,
settings.TWEET_OAUTH_ACCESS_TOKEN,
time.sleep(5)
start_stream(terms)
- logger.info('Running threads:')
- for t in threading.enumerate():
- logger.info(" {}".format(t.name))
-
-
def export_tweets(filename):
exporter = ExcelExporter(filename)
- tweets = Tweet.objects.all()
+ tweets = Tweet.objects.filter(conforms_to_terms=True).order_by('-pk')
for tweet in tweets:
exporter.add_tweet(tweet)
exporter.close()
+
+def check_match(status):
+ result = False
+ if _match_pattern.search(status["text"]) is not None:
+ result = True
+ if result:
+ #logger.debug("Terms found in text")
+ #logger.debug(" \"{}\"".format(status["text"]))
+ return result
+
+ if "entities" in status:
+ if "urls" in status["entities"]:
+ for ue in status["entities"]["urls"]:
+ if _match_pattern.search(ue["display_url"]) is not None:
+ result = True
+ if _match_pattern.search(ue["expanded_url"]) is not None:
+ result = True
+
+ if result:
+# logger.debug("Terms found in URL entities")
+# for ue in status["entities"]["urls"]:
+# logger.debug(" "+ue["display_url"])
+# logger.debug(" "+ue["expanded_url"])
+ return result
+
+ if "media" in status["entities"]:
+ for ue in status["entities"]["media"]:
+ if _match_pattern.search(ue["display_url"]) is not None:
+ result = True
+ if _match_pattern.search(ue["expanded_url"]) is not None:
+ result = True
+ if result:
+# logger.debug("Terms found in media URLs")
+# for ue in status["entities"]["media"]:
+# logger.debug(" "+ue["display_url"])
+# logger.debug(" "+ue["expanded_url"])
+ return result
+
+ if "hashtags" in status["entities"]:
+ for ue in status["entities"]["hashtags"]:
+ if _match_pattern.search(ue["text"]) is not None:
+ result = True
+ if result:
+# logger.debug("Terms found in hashtags")
+# for ue in status["entities"]["hashtags"]:
+# logger.debug(" "+ue["text"])
+ return result
+
+ if "user_mentions" in status["entities"]:
+ for ue in status["entities"]["user_mentions"]:
+ if _match_pattern.search(ue["screen_name"]) is not None:
+ result = True
+ if _match_pattern.search(ue["name"]) is not None:
+ result = True
+ if result:
+# logger.debug("Terms found in user_mentions")
+# for ue in status["entities"]["user_mentions"]:
+# logger.debug(" "+ue["screen_name"])
+# logger.debug(" "+ue["name"])
+ return result
+
+ logger.debug("Terms NOT FOUND in tweet:")
+ logger.debug(" created_at: " + status["created_at"])
+ logger.debug(" text: " + status["text"])
+
+# if "entities" in status:
+# if "urls" in status["entities"]:
+# logger.debug(" Terms not found in URL entities")
+# for ue in status["entities"]["urls"]:
+# logger.debug(" "+ue["display_url"])
+# logger.debug(" "+ue["expanded_url"])
+#
+# if "media" in status["entities"]:
+# logger.debug(" Terms not found in Media entities")
+# for ue in status["entities"]["media"]:
+# logger.debug(" "+ue["display_url"])
+# logger.debug(" "+ue["expanded_url"])
+#
+# if "hashtags" in status["entities"]:
+# logger.debug(" Terms not found in Hashtag entities")
+# for ue in status["entities"]["hashtags"]:
+# logger.debug(" "+ue["text"])
+#
+# if "user_mentions" in status["entities"]:
+# logger.debug(" Terms not found in User mention entities")
+# for ue in status["entities"]["user_mentions"]:
+# logger.debug(" "+ue["screen_name"])
+# logger.debug(" "+ue["name"])
+
+# logger.debug(json.dumps(status))
+
+ return False