main.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. #/usr/bin/env python3
  2. # *-* coding: utf8 *-*
  3. import argparse
  4. import logging
  5. import base64
  6. import datetime
  7. import getpass
  8. import json
  9. import os
  10. import re
  11. import subprocess
  12. import mastodon
  13. import feedparser
  14. import requests
  15. # =============================================================================
  16. # CONSTANTS
  17. # =============================================================================
  18. BIRDSITELINKBOT_VERSION_NUMBER = "dev"
  19. BIRDSITELINKBOT_VERSION = "Birdsitelink-Bot - v" + BIRDSITELINKBOT_VERSION_NUMBER
  20. BIRDSITELINKBOT_APP_NAME = "Birdsitelink-Bot"
  21. BIRDSITELINKBOT_TWITRSSME_BASEURL= "http://twitrss.me/twitter_user_to_rss/?user={user}"
  22. BIRDSITELINKBOT_DEFAULT_DATA = { "instance": "birdsite.link",
  23. "client_id": None,
  24. "client_secret": None,
  25. "mirrored": [], } # "mirrored" : [
  26. # { "handle": "tim" ,
  27. # "access_token": 1,
  28. # "lastweet": 12345,
  29. # "last_rt": None }
  30. # ]
  31. # =============================================================================
  32. # ARGUMENTS
  33. # =============================================================================
  34. parser = argparse.ArgumentParser()
  35. parser.add_argument( '-V', '--version',
  36. action = 'store_true',
  37. help = 'show bot version' )
  38. parser.add_argument( '-v', '--verbose',
  39. action = 'count',
  40. default = 0,
  41. help = 'more verbose logs; repeat to increase verbosity' )
  42. parser.add_argument( '-s', '--simulate',
  43. action = 'store_true',
  44. help = 'do not actually post anything' )
  45. parser.add_argument( '-d', '--data-file',
  46. default = 'mstdn_birdsitelinkbot.data',
  47. help = 'the data file' )
  48. parser.add_argument( '-I', '--instance',
  49. default = 'birdsite.link',
  50. help = 'the instance on which the users tweets should be mirrored' )
  51. parser.add_argument( '-O', '--no-old-tweets',
  52. action = 'store_true',
  53. help = 'to use with add command ; do not mirror tweets published before right now' )
  54. parser.add_argument( 'command', help='command to use', choices=['run', 'add'] )
  55. parser.add_argument( 'handle', nargs='?', help='handle to add or run' )
  56. args = parser.parse_args()
  57. # =============================================================================
  58. # LOGGING
  59. # =============================================================================
  60. log = logging.getLogger("Birdsitelink-Bot")
  61. log.setLevel(logging.DEBUG)
  62. log_handler_console = logging.StreamHandler()
  63. if args.verbose > 1:
  64. log_handler_console.setLevel(logging.DEBUG)
  65. elif args.verbose > 0:
  66. log_handler_console.setLevel(logging.INFO)
  67. else:
  68. log_handler_console.setLevel(logging.WARNING)
  69. try:
  70. import pycolog
  71. log_formatter_console = pycolog.ColoredFormatter("%(message)s")
  72. except ImportError:
  73. log_formatter_console = logging.Formatter("%(name)s:%(levelname)s: %(message)s")
  74. log_handler_console.setFormatter(log_formatter_console)
  75. log.addHandler(log_handler_console)
  76. # =============================================================================
  77. # FUNCTIONS
  78. # =============================================================================
  79. # -----------------------------------------------------------------------------
  80. def register(instance, login, password, data):
  81. """Register the application with the required name"""
  82. instance_url = "https://{}".format(instance)
  83. if data["client_id"] is None or data["client_secret"] is None :
  84. log.warning("App needs registering")
  85. client_id, client_secret = \
  86. mastodon.Mastodon.create_app( BIRDSITELINKBOT_APP_NAME,
  87. api_base_url = instance_url )
  88. data["client_id"] = client_id
  89. data["client_secret"] = client_secret
  90. mstdn = mastodon.Mastodon( client_id = data["client_id"],
  91. client_secret = data["client_secret"],
  92. api_base_url = instance_url )
  93. log.warning("Getting access token for {}".format(login))
  94. access_token = mstdn.log_in( username = login,
  95. password = password )
  96. return access_token
  97. # -----------------------------------------------------------------------------
  98. def load_data(datafile, default=BIRDSITELINKBOT_DEFAULT_DATA):
  99. data = {}
  100. try:
  101. log.debug("Loading data from {}".format(datafile))
  102. with open(datafile, 'r') as file:
  103. data = json.load(file)
  104. except FileNotFoundError:
  105. log.warning("No data file {} found".format(args.data_file))
  106. # Set default values if they do not exist
  107. for d in default:
  108. if not d in data:
  109. log.warning("'{}' not in data, adding default value".format(d))
  110. data[d] = default[d]
  111. return data
  112. # -----------------------------------------------------------------------------
  113. def save_data(datafile, data):
  114. log.info("Saving data file {}".format(datafile))
  115. with open(datafile,'w') as file:
  116. json.dump(data,file)
  117. # =============================================================================
  118. # SCRIPT
  119. # =============================================================================
  120. # Load data
  121. # -----------------------------------------------------------------------------
  122. data = load_data( args.data_file )
  123. # Fill out empty arguments with data & data from arguments
  124. # -----------------------------------------------------------------------------
  125. if args.instance is None:
  126. args.instance = data["instance"]
  127. if data["instance"] != args.instance:
  128. data["instance"] = args.instance
  129. save_data( args.data_file, data )
  130. log.debug("Command used is '{}'".format(args.command))
  131. # ADD
  132. # -----------------------------------------------------------------------------
  133. if args.command == "add" :
  134. # If there's no handle given on the command line, we ask for it.
  135. if args.handle == None:
  136. args.handle = input("Twitter user you want to add: @")
  137. # Remove the leading "@" if needs be
  138. if args.handle[0] == "@":
  139. args.handle = args.handle[1:]
  140. # Now that we know the Twitter handle, we can deduce the Mastodon login
  141. # (and fullhandle, those are the same here)
  142. login = "{}@{}".format( args.handle, args.instance )
  143. # Check if we need to really add the user
  144. for user in data["mirrored"]:
  145. if ( user["handle"] == args.handle
  146. and"access_token" in user ) :
  147. log.warning("User {} is already mirrored".format(args.handle))
  148. exit(0)
  149. log.warning("No access token for user {}, registering now".format(login) )
  150. password = getpass.getpass( prompt = "Password for {}: ".format(login) )
  151. if not args.simulate:
  152. access_token = register( args.instance, login, password, data )
  153. # Add handle to data
  154. log.info("Adding {} to the list of mirrored accounts".format(login) )
  155. if not args.simulate:
  156. # TODO update bio
  157. lastweet = 0
  158. if args.no_old_tweets:
  159. lastweet = int( datetime.datetime.now().timestamp() )
  160. data['mirrored'].append ( { "handle" : args.handle,
  161. "access_token" : access_token,
  162. "lastweet" : lastweet, } )
  163. save_data( args.data_file, data )
  164. # RUN
  165. # -----------------------------------------------------------------------------
  166. if args.command == "run":
  167. for user in data["mirrored"]:
  168. handle = user["handle"]
  169. lastweet = datetime.datetime.fromtimestamp( user["lastweet"] )
  170. # Now that we know the Twitter handle, we can deduce the Mastodon login
  171. # (and fullhandle, those are the same here)
  172. login = "{}@{}".format( args.handle, args.instance )
  173. log.debug( "Mirroring user @{} (last seen: {})" \
  174. .format( handle, lastweet.strftime("%Y-%m-%d %H:%M:%S") ) )
  175. if not args.simulate:
  176. mstdn = mastodon.Mastodon( client_id = data["client_id"],
  177. client_secret = data["client_secret"],
  178. access_token = user["access_token"],
  179. api_base_url = data["instance"] )
  180. # Get tweets
  181. log.debug("Getting tweets feed")
  182. twitrssme_file = os.path.join( "twitrssme",
  183. "fcgi",
  184. "twitter_user_to_rss.pl" )
  185. if os.path.exists( twitrssme_file ):
  186. session = subprocess.Popen( [ "perl",
  187. twitrssme_file,
  188. "user={}".format(handle) ],
  189. stdout = subprocess.PIPE,
  190. stderr = subprocess.PIPE )
  191. stdout, stderr = session.communicate()
  192. if stderr:
  193. log.error( "Something went wrong with {}:"\
  194. .format(twitrssme_file) )
  195. log.error( stderr )
  196. log.warning( "Using online service at {}" \
  197. .format( BIRDSITELINKBOT_TWITRSSME_BASEURL ) )
  198. tweets = feedparser.parse( BIRDSITELINKBOT_TWITRSSME_BASEURL \
  199. .format( user = handle ) )
  200. else:
  201. stdout = stdout.decode("utf8")
  202. xmlstart = stdout.find("\n\n") +2
  203. tweets = feedparser.parse( stdout[xmlstart:] )
  204. else:
  205. log.warning("No {}, using online service at {}" \
  206. .format( twitrssme_file,
  207. BIRDSITELINKBOT_TWITRSSME_BASEURL ) )
  208. tweets = feedparser.parse( BIRDSITELINKBOT_TWITRSSME_BASEURL \
  209. .format( user = handle ) )
  210. # Update PP
  211. try:
  212. image_url = tweets.feed.image.href
  213. if not "image" in user or user["image"] != image_url:
  214. log.debug("Updating @{} PP".format(handle))
  215. extension = image_url.split(".")[-1]
  216. image = requests.get( image_url )
  217. image = base64.b64encode( image.content )
  218. image = "data:image/{};base64,{}".format( extension,
  219. image.decode("ascii") )
  220. if not args.simulate:
  221. mstdn.account_update_credentials( avatar = image )
  222. user["image"] = image_url
  223. save_data( args.data_file, data )
  224. except Exception as e:
  225. log.error("Something went wrong when trying to update @{} PP".format(handle))
  226. log.error(e)
  227. # Treat tweets
  228. start_mirroring = False
  229. for t in reversed(tweets.entries):
  230. t_date = datetime.datetime( t.published_parsed.tm_year,
  231. t.published_parsed.tm_mon,
  232. t.published_parsed.tm_mday,
  233. t.published_parsed.tm_hour,
  234. t.published_parsed.tm_min,
  235. t.published_parsed.tm_sec )
  236. # Check if the tweet is a RT. This will impact how we check the
  237. # tweet date (the date of a RT is the date of it's first
  238. # publication, not the date the RT occured).
  239. is_retweet = False
  240. if t.author != "(@{})".format( handle ):
  241. is_retweet = True
  242. if not is_retweet and t_date < lastweet:
  243. log.debug("Skipping older tweet...")
  244. continue
  245. # If this is not a retweet, and it was published later than
  246. # lastweet, this means we can activate start_mirroring (meaning the
  247. # following RT will be published nevermind their publication date).
  248. if not is_retweet:
  249. start_mirroring = True
  250. if not is_retweet and t_date == lastweet:
  251. log.debug("Skipping last mirrored tweet...")
  252. continue
  253. # Likewise, if last_rt is set it means the last thing we mirrored
  254. # was a RT. If we find that RT, it means we can start mirroring.
  255. if not start_mirroring and is_retweet and "last_rt" in user and user["last_rt"] == t.link:
  256. start_mirroring = True
  257. log.debug("Skipping last mirrored RT...")
  258. continue
  259. if is_retweet and not start_mirroring:
  260. log.debug("Skipping older RT...")
  261. continue
  262. log.info("New tweet from @{} at: {}" \
  263. .format( handle,
  264. t_date.strftime("%Y-%m-%d %H:%M:%S") ) )
  265. try:
  266. toot_text = t.title
  267. # Handle retweets
  268. if is_retweet:
  269. log.info("This is a RT from {}".format(t.author[1:-1]))
  270. toot_text = "RT {}@{}\n{}".format( t.author[1:-1],
  271. data["instance"],
  272. toot_text)
  273. # Handle medias
  274. try:
  275. log.debug("Handling medias...")
  276. toot_media = []
  277. for link in re.finditer( r"https://pbs.twimg.com/[^ \xa0\"]*", t.summary ):
  278. r = requests.get(link.group(0))
  279. log.debug("Adding media to toot")
  280. if not args.simulate:
  281. m = mstdn.media_post( r.content,
  282. mime_type = r.headers.get('content-type') )
  283. toot_media.append(m['id'])
  284. except Exception as e:
  285. log.error("Something went wrong with media")
  286. log.error(e)
  287. # Handle twitter links redirections
  288. try:
  289. link = re.search( r"http[^ \xa0]*", toot_text )
  290. if link != None:
  291. log.debug("Handling twitter links redirections...")
  292. r = requests.get(link.group(0), allow_redirects=False)
  293. if r.status_code in {301,302}:
  294. toot_text = toot_text.replace( link.group(0),
  295. r.headers.get('Location') )
  296. except Exception as e:
  297. log.error("Something went wrong with redirections")
  298. log.error(e)
  299. # Remove pic.twitter.com links
  300. link = re.search(r"pic.twitter.com[^ \xa0]*", toot_text )
  301. if link != None:
  302. log.debug( "Removing pic.twitter.com links..." )
  303. toot_text = toot_text.replace(link.group(0),' ')
  304. # Remove ellipsis
  305. toot_text = toot_text.replace('\xa0…',' ')
  306. # Add tweet source link
  307. toot_text = "{}\n\n{}".format(toot_text, t.link)
  308. # Finally, posting
  309. log.debug("Tooting with medias {}:".format(str(toot_media)))
  310. log.info( toot_text )
  311. if not args.simulate:
  312. toot = mstdn.status_post( toot_text,
  313. in_reply_to_id = None,
  314. media_ids = toot_media,
  315. sensitive = False,
  316. visibility = "public",
  317. spoiler_text = None )
  318. # Update data & save in case we are interrupted
  319. if not is_retweet:
  320. user["lastweet"] = int( t_date.timestamp() )
  321. user["last_rt"] = None
  322. else:
  323. user["lastweet"] = user["lastweet"] +1
  324. user["last_rt"] = t.link
  325. save_data( args.data_file, data )
  326. except Exception as e:
  327. log.error("Uh oh, something went wrong")
  328. log.error(e)