first commit
This commit is contained in:
parent
1e63e9e1cc
commit
5f4d453765
27 changed files with 1864 additions and 0 deletions
15
feed2toot/__init__.py
Normal file
15
feed2toot/__init__.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
#!/usr/bin/env python3
|
||||
# vim:ts=4:sw=4:ft=python:fileencoding=utf-8
|
||||
# Copyright © 2017 Carl Chenet <carl.chenet@ohmytux.com>
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>
|
52
feed2toot/addtags.py
Normal file
52
feed2toot/addtags.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
#!/usr/bin/env python3
|
||||
# vim:ts=4:sw=4:ft=python:fileencoding=utf-8
|
||||
# Copyright © 2015-2017 Carl Chenet <carl.chenet@ohmytux.com>
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
|
||||
# Add as many tags as possible depending on the tweet length
|
||||
'''Add as many tags as possible depending on the tweet length'''
|
||||
|
||||
# standard library imports
|
||||
from operator import itemgetter
|
||||
|
||||
class AddTags(object):
|
||||
'''Add as many tags as possible depending on the tweet length'''
|
||||
def __init__(self, tweet, tags):
|
||||
'''Constructor of AddTags class'''
|
||||
self.tags = tags
|
||||
self.tweet = tweet
|
||||
self.main()
|
||||
|
||||
def main(self):
|
||||
'''Main of the AddTags class class'''
|
||||
maxlength = 500
|
||||
shortenedlinklength = 23
|
||||
tweetlength = len(self.tweet)
|
||||
|
||||
# sort list of tags, the ones with the greater length first
|
||||
tagswithindices = ({'text':i, 'length':len(i)} for i in self.tags)
|
||||
sortedtagswithindices = sorted(tagswithindices, key=itemgetter('length'), reverse=True)
|
||||
self.tags = (i['text'] for i in sortedtagswithindices)
|
||||
|
||||
# add tags is space is available
|
||||
for tag in self.tags:
|
||||
taglength = len(tag)
|
||||
if (tweetlength + (taglength +1)) <= maxlength:
|
||||
self.tweet = ' '.join([self.tweet, tag])
|
||||
tweetlength += (taglength + 1)
|
||||
|
||||
@property
|
||||
def finaltweet(self):
|
||||
'''return the final tweet with as many tags as possible'''
|
||||
return self.tweet
|
105
feed2toot/cliparse.py
Normal file
105
feed2toot/cliparse.py
Normal file
|
@ -0,0 +1,105 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright © 2015-2017 Carl Chenet <carl.chenet@ohmytux.com>
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
|
||||
# CLI parsing
|
||||
'''CLI parsing'''
|
||||
|
||||
# standard library imports
|
||||
from argparse import ArgumentParser
|
||||
import glob
|
||||
import logging
|
||||
import os.path
|
||||
import sys
|
||||
|
||||
__version__ = '0.1'
|
||||
|
||||
class CliParse(object):
|
||||
'''CliParse class'''
|
||||
def __init__(self):
|
||||
'''Constructor for the CliParse class'''
|
||||
self.main()
|
||||
|
||||
def main(self):
|
||||
'''main of CliParse class'''
|
||||
feed2tootepilog = 'For more information: https://feed2toot.readhthedocs.org'
|
||||
feed2tootdescription = 'Take rss feed and send it to Mastodon'
|
||||
parser = ArgumentParser(prog='feed2toot',
|
||||
description=feed2tootdescription,
|
||||
epilog=feed2tootepilog)
|
||||
parser.add_argument('--version', action='version', version=__version__)
|
||||
parser.add_argument('-c', '--config',
|
||||
default=[os.path.join(os.getenv('XDG_CONFIG_HOME', '~/.config'),
|
||||
'feed2toot.ini')],
|
||||
nargs='+',
|
||||
dest="config",
|
||||
help='Location of config file (default: %(default)s)',
|
||||
metavar='FILE')
|
||||
parser.add_argument('-a', '--all', action='store_true', default=False,
|
||||
dest='all',
|
||||
help='tweet all RSS items, regardless of cache')
|
||||
parser.add_argument('-l', '--limit', dest='limit', default=10, type=int,
|
||||
help='tweet only LIMIT items (default: %(default)s)')
|
||||
parser.add_argument('--cachefile', dest='cachefile',
|
||||
help='location of the cache file (default: %(default)s)')
|
||||
parser.add_argument('-n', '--dry-run', dest='dryrun',
|
||||
action='store_true', default=False,
|
||||
help='Do not actually post tweets')
|
||||
parser.add_argument('-v', '--verbose', '--info', dest='log_level',
|
||||
action='store_const', const='info', default='warning',
|
||||
help='enable informative (verbose) output, work on log level INFO')
|
||||
parser.add_argument('-d', '--debug', dest='log_level',
|
||||
action='store_const', const='debug', default='warning',
|
||||
help='enable debug output, work on log level DEBUG')
|
||||
levels = [i for i in logging._nameToLevel.keys()
|
||||
if (type(i) == str and i != 'NOTSET')]
|
||||
parser.add_argument('--syslog', nargs='?', default=None,
|
||||
type=str.upper, action='store',
|
||||
const='INFO', choices=levels,
|
||||
help="""log to syslog facility, default: no
|
||||
logging, INFO if --syslog is specified without
|
||||
argument""")
|
||||
parser.add_argument('--hashtaglist', dest='hashtaglist',
|
||||
help='a list of hashtag to match')
|
||||
parser.add_argument('-p', '--populate-cache', action='store_true', default=False,
|
||||
dest='populate',
|
||||
help='populate RSS entries in cache without actually posting them to Mastodon')
|
||||
parser.add_argument('-r', '--rss', help='the RSS feed URL to fetch items from',
|
||||
dest='rss_uri', metavar='http://...')
|
||||
parser.add_argument('--rss-sections', action='store_true', default=False,
|
||||
dest='rsssections',
|
||||
help='print the available sections of the rss feed to be used in the tweet template')
|
||||
self.opts = parser.parse_args()
|
||||
# expand the path to the cache file if defined
|
||||
if self.opts.cachefile:
|
||||
self.opts.cachefile = os.path.expanduser(self.opts.cachefile)
|
||||
# verify if the path to cache file is an absolute path
|
||||
# get the different config files, from a directory or from a *.ini style
|
||||
self.opts.config = list(map(os.path.expanduser, self.options.config))
|
||||
for element in self.opts.config:
|
||||
if element and not os.path.exists(element):
|
||||
sys.exit('You should provide an existing path for the config file: %s' % element)
|
||||
if os.path.isdir(element):
|
||||
self.opts.configs = glob.glob(os.path.join(element, '*.ini'))
|
||||
else:
|
||||
# trying to glob the path
|
||||
self.opts.configs = glob.glob(element)
|
||||
# verify if a configuration file is provided
|
||||
if not self.opts.configs:
|
||||
sys.exit('no configuration file was found at the specified path(s) with the option -c')
|
||||
|
||||
@property
|
||||
def options(self):
|
||||
'''return the path to the config file'''
|
||||
return self.opts
|
231
feed2toot/confparse.py
Normal file
231
feed2toot/confparse.py
Normal file
|
@ -0,0 +1,231 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright © 2015-2017 Carl Chenet <carl.chenet@ohmytux.com>
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/
|
||||
|
||||
# Get values of the configuration file
|
||||
'''Get values of the configuration file'''
|
||||
|
||||
# standard library imports
|
||||
from configparser import SafeConfigParser, NoOptionError, NoSectionError
|
||||
import logging
|
||||
import os
|
||||
import os.path
|
||||
import socket
|
||||
import sys
|
||||
|
||||
# 3rd party library imports
|
||||
import feedparser
|
||||
|
||||
class ConfParse(object):
|
||||
'''ConfParse class'''
|
||||
def __init__(self, clioptions):
|
||||
'''Constructor of the ConfParse class'''
|
||||
self.clioptions = clioptions
|
||||
self.tweetformat = ''
|
||||
self.stringsep = ','
|
||||
self.confs = []
|
||||
self.main()
|
||||
|
||||
def main(self):
|
||||
'''Main of the ConfParse class'''
|
||||
for pathtoconfig in self.clioptions.configs:
|
||||
options = {}
|
||||
# read the configuration file
|
||||
config = SafeConfigParser()
|
||||
if not config.read(os.path.expanduser(pathtoconfig)):
|
||||
sys.exit('Could not read config file')
|
||||
###########################
|
||||
#
|
||||
# the rss section
|
||||
#
|
||||
###########################
|
||||
section = 'rss'
|
||||
if config.has_section(section):
|
||||
############################
|
||||
# tweet option
|
||||
############################
|
||||
confoption = 'tweet'
|
||||
if config.has_option(section, confoption):
|
||||
self.tweetformat = config.get(section, confoption)
|
||||
else:
|
||||
sys.exit('You should define a format for your tweet with the keyword "tweet" in the [rss] section')
|
||||
############################
|
||||
# pattern format option
|
||||
############################
|
||||
options['patterns'] = {}
|
||||
options['patternscasesensitive'] = {}
|
||||
for pattern in ['summary_detail', 'published_parsed', 'guidislink', 'authors', 'links', 'title_detail', 'author', 'author_detail', 'comments', 'published', 'summary', 'tags', 'title', 'link', 'id']:
|
||||
currentoption = '{}_pattern'.format(pattern)
|
||||
if config.has_option(section, currentoption):
|
||||
tmppattern = config.get(section, currentoption)
|
||||
if self.stringsep in tmppattern:
|
||||
options['patterns'][currentoption] = [i for i in tmppattern.split(self.stringsep) if i]
|
||||
else:
|
||||
options['patterns'][currentoption] = [tmppattern]
|
||||
|
||||
# pattern_case_sensitive format
|
||||
currentoption = '{}_pattern_case_sensitive'.format(pattern)
|
||||
if config.has_option(section, currentoption):
|
||||
try:
|
||||
options['patternscasesensitive'][currentoption] = config.getboolean(section, currentoption)
|
||||
except ValueError as err:
|
||||
print(err)
|
||||
options['patternscasesensitive'][currentoption] = True
|
||||
############################
|
||||
# rsslist
|
||||
############################
|
||||
bozoexception = False
|
||||
feeds = []
|
||||
patterns = []
|
||||
currentoption = 'uri_list'
|
||||
if config.has_option(section, currentoption):
|
||||
rssfile = config.get(section, currentoption)
|
||||
rssfile = os.path.expanduser(rssfile)
|
||||
if not os.path.exists(rssfile) or not os.path.isfile(rssfile):
|
||||
sys.exit('The path to the uri_list parameter is not valid: {rssfile}'.format(rssfile=rssfile))
|
||||
rsslist = open(rssfile, 'r').readlines()
|
||||
for line in rsslist:
|
||||
line = line.strip()
|
||||
# split each line in two parts, rss link and a string with the different patterns to look for
|
||||
confobjects = line.split('|')
|
||||
if len(confobjects) > 3 or len(confobjects) == 2:
|
||||
sys.exit('This line in the list of uri to parse is not formatted correctly: {line}'.format(line))
|
||||
if len(confobjects) == 3:
|
||||
rss, rssobject, patternstring = line.split('|')
|
||||
if len(confobjects) == 1:
|
||||
rss = confobjects[0]
|
||||
rssobject = ''
|
||||
patternstring = ''
|
||||
# split different searched patterns
|
||||
patterns = [i for i in patternstring.split(self.stringsep) if i]
|
||||
# retrieve the content of the rss
|
||||
feed = feedparser.parse(rss)
|
||||
if 'bozo_exception' in feed:
|
||||
bozoexception = True
|
||||
logging.warning(feed['bozo_exception'])
|
||||
continue
|
||||
# check if the rss feed and the rss entry are valid ones
|
||||
if 'entries' in feed:
|
||||
if rssobject and rssobject not in feed['entries'][0].keys():
|
||||
sys.exit('The rss object {rssobject} could not be found in the feed {rss}'.format(rssobject=rssobject, rss=rss))
|
||||
else:
|
||||
sys.exit('The rss feed {rss} does not seem to be valid'.format(rss=rss))
|
||||
feeds.append({'feed': feed, 'patterns': patterns, 'rssobject': rssobject})
|
||||
# test if all feeds in the list were unsuccessfully retrieved and if so, leave
|
||||
if not feeds and bozoexception:
|
||||
sys.exit('No feed could be retrieved. Leaving.')
|
||||
############################
|
||||
# uri
|
||||
############################
|
||||
if not feeds and not self.clioptions.rss_uri:
|
||||
confoption = 'uri'
|
||||
if config.has_option(section, confoption):
|
||||
options['rss_uri'] = config.get('rss', 'uri')
|
||||
else:
|
||||
sys.exit('{confoption} parameter in the [{section}] section of the configuration file is mandatory. Exiting.'.format(section=section, confoption=confoption))
|
||||
else:
|
||||
options['rss_uri'] = self.clioptions.rss_uri
|
||||
# get the rss feed for rss parameter of [rss] section
|
||||
feed = feedparser.parse(options['rss_uri'])
|
||||
if not feed:
|
||||
sys.exit('Unable to parse the feed at the following url: {rss}'.format(rss=rss))
|
||||
|
||||
#########################################
|
||||
# no_uri_pattern_no_global_pattern option
|
||||
#########################################
|
||||
currentoption = 'no_uri_pattern_no_global_pattern'
|
||||
# default value
|
||||
options['nopatternurinoglobalpattern'] = False
|
||||
if config.has_option(section, currentoption):
|
||||
options['nopatternurinoglobalpattern'] = config.getboolean(section, currentoption)
|
||||
###########################
|
||||
#
|
||||
# the cache section
|
||||
#
|
||||
###########################
|
||||
section = 'cache'
|
||||
if not self.clioptions.cachefile:
|
||||
confoption = 'cachefile'
|
||||
if config.has_section(section):
|
||||
options['cachefile'] = config.get(section, confoption)
|
||||
else:
|
||||
sys.exit('You should provide a {confoption} parameter in the [{section}] section'.format(section=section, confoption=confoption))
|
||||
options['cachefile'] = os.path.expanduser(options['cachefile'])
|
||||
cachefileparent = os.path.dirname(options['cachefile'])
|
||||
if cachefileparent and not os.path.exists(cachefileparent):
|
||||
sys.exit('The parent directory of the cache file does not exist: {cachefileparent}'.format(cachefileparent=cachefileparent))
|
||||
else:
|
||||
options['cachefile'] = self.clioptions.cachefile
|
||||
### cache limit
|
||||
if config.has_section(section):
|
||||
confoption = 'cache_limit'
|
||||
if config.has_option(section, confoption):
|
||||
try:
|
||||
options['cache_limit'] = int(config.get(section, confoption))
|
||||
except ValueError as err:
|
||||
sys.exit('Error in configuration with the {confoption} parameter in [{section}]: {err}'.format(confoption=confoption, section=section, err=err))
|
||||
else:
|
||||
options['cache_limit'] = 100
|
||||
else:
|
||||
options['cache_limit'] = 100
|
||||
###########################
|
||||
#
|
||||
# the hashtag section
|
||||
#
|
||||
###########################
|
||||
section = 'hashtaglist'
|
||||
if not self.clioptions.hashtaglist:
|
||||
confoption = 'several_words_hashtags_list'
|
||||
if config.has_section(section):
|
||||
options['hashtaglist'] = config.get(section, confoption)
|
||||
options['hashtaglist'] = os.path.expanduser(options['hashtaglist'])
|
||||
if not os.path.exists(options['hashtaglist']) or not os.path.isfile(options['hashtaglist']):
|
||||
sys.exit('The path to the several_words_hashtags_list parameter is not valid: {hashtaglist}'.format(hashtaglist=options['hashtaglist']))
|
||||
else:
|
||||
options['hashtaglist'] = False
|
||||
###########################
|
||||
#
|
||||
# the plugins section
|
||||
#
|
||||
###########################
|
||||
plugins = {}
|
||||
section = 'influxdb'
|
||||
if config.has_section(section):
|
||||
##########################################
|
||||
# host, port, user, pass, database options
|
||||
##########################################
|
||||
plugins[section] = {}
|
||||
for currentoption in ['host','port','user','pass','database']:
|
||||
if config.has_option(section, currentoption):
|
||||
plugins[section][currentoption] = config.get(section, currentoption)
|
||||
if 'host' not in plugins[section]:
|
||||
plugins[section]['host'] = '127.0.0.1'
|
||||
if 'port' not in plugins[section]:
|
||||
plugins[section]['port'] = 8086
|
||||
if 'measurement' not in plugins[section]:
|
||||
plugins[section]['measurement'] = 'tweets'
|
||||
for field in ['user','pass','database']:
|
||||
if field not in plugins[section]:
|
||||
sys.exit('Parsing error for {field} in the [{section}] section: {field} is not defined'.format(field=field, section=section))
|
||||
|
||||
# create the returned object with previously parsed data
|
||||
if feeds:
|
||||
self.confs.append((options, config, self.tweetformat, feeds, plugins))
|
||||
else:
|
||||
self.confs.append((options, config, self.tweetformat, [{'feed': feed, 'patterns': [], 'rssobject': ''}], plugins))
|
||||
|
||||
@property
|
||||
def confvalues(self):
|
||||
'''Return the values of the different configuration files'''
|
||||
return self.confs
|
90
feed2toot/filterentry.py
Normal file
90
feed2toot/filterentry.py
Normal file
|
@ -0,0 +1,90 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright © 2015-2017 Carl Chenet <carl.chenet@ohmytux.com>
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/
|
||||
|
||||
# Filter an entry of the RSS feeds
|
||||
'''Filter an entry of the RSS feeds'''
|
||||
|
||||
# standard library imports
|
||||
from configparser import SafeConfigParser, NoOptionError, NoSectionError
|
||||
import os
|
||||
import os.path
|
||||
import sys
|
||||
|
||||
# 3rd party library imports
|
||||
import feedparser
|
||||
|
||||
class FilterEntry(object):
|
||||
'''FilterEntry class'''
|
||||
def __init__(self, elements, entry, options, byrsspatterns, rssobject):
|
||||
'''Constructor of the FilterEntry class'''
|
||||
self.matching = {}
|
||||
self.entry = entry
|
||||
self.elements = elements
|
||||
self.options = options
|
||||
self.byrsspatterns = byrsspatterns
|
||||
self.rssobject = rssobject
|
||||
self.main()
|
||||
|
||||
def main(self):
|
||||
'''Main of the FilterEntry class'''
|
||||
for i in self.elements:
|
||||
if i not in self.entry:
|
||||
sys.exit('The element {} is not available in the RSS feed. The available ones are: {}'.format(i, [j for j in self.entry]))
|
||||
# for the case if no pattern at all is defined
|
||||
if not self.options['patterns'] and not self.byrsspatterns and not self.rssobject:
|
||||
self.matching[i] = self.entry[i]
|
||||
# global filter only
|
||||
elif self.options['patterns'] and not self.byrsspatterns and not self.rssobject:
|
||||
if not self.options['nopatternurinoglobalpattern']:
|
||||
self.applyglobalfilter(i)
|
||||
else:
|
||||
self.matching[i] = self.entry[i]
|
||||
# global filter and then by rss filter
|
||||
elif self.options['patterns'] and self.byrsspatterns and self.rssobject:
|
||||
# patterns by rss
|
||||
self.applyglobalfilter(i)
|
||||
self.applyspecificfilter(i)
|
||||
elif not self.options['patterns'] and self.byrsspatterns and self.rssobject:
|
||||
self.applyspecificfilter(i)
|
||||
else:
|
||||
self.matching[i] = self.entry[i]
|
||||
|
||||
def applyglobalfilter(self, i):
|
||||
'''Apply the global filter'''
|
||||
for patternlist in self.options['patterns']:
|
||||
if not self.options['patternscasesensitive']['{}_case_sensitive'.format(patternlist)]:
|
||||
# not case sensitive, so we compare the lower case
|
||||
for pattern in self.options['patterns'][patternlist]:
|
||||
finalpattern = pattern.lower()
|
||||
finaltitle = self.entry[patternlist.split('_')[0]].lower()
|
||||
if finalpattern in finaltitle:
|
||||
self.matching[i] = self.entry[i]
|
||||
else:
|
||||
# case sensitive, so we use the user-defined pattern
|
||||
for pattern in self.options['patterns'][patternlist]:
|
||||
if pattern in self.entry['title']:
|
||||
self.matching[i] = self.entry[i]
|
||||
|
||||
def applyspecificfilter(self, i):
|
||||
'''Apply specific filters for by-rss pattern matching'''
|
||||
for byrsspattern in self.byrsspatterns:
|
||||
byrssfinalpattern = byrsspattern.lower()
|
||||
if byrssfinalpattern in self.entry[self.rssobject].lower():
|
||||
self.matching[i] = self.entry[i]
|
||||
|
||||
@property
|
||||
def finalentry(self):
|
||||
'''Return the processed entry'''
|
||||
return self.matching
|
204
feed2toot/main.py
Executable file
204
feed2toot/main.py
Executable file
|
@ -0,0 +1,204 @@
|
|||
#!/usr/bin/env python3
|
||||
# vim:ts=4:sw=4:ft=python:fileencoding=utf-8
|
||||
# Copyright © 2015-2017 Carl Chenet <carl.chenet@ohmytux.com>
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
|
||||
"""Checks an RSS feed and posts new entries to Mastodon."""
|
||||
|
||||
# standard libraires imports
|
||||
import codecs
|
||||
import importlib
|
||||
import logging
|
||||
import logging.handlers
|
||||
import os
|
||||
import sys
|
||||
|
||||
# 3rd party libraries imports
|
||||
import feedparser
|
||||
from persistentlist import PersistentList
|
||||
import tweepy
|
||||
|
||||
# app libraries imports
|
||||
from feed2toot.addtags import AddTags
|
||||
from feed2toot.cliparse import CliParse
|
||||
from feed2toot.confparse import ConfParse
|
||||
from feed2toot.filterentry import FilterEntry
|
||||
from feed2toot.removeduplicates import RemoveDuplicates
|
||||
from feed2toot.tootpost import TootPost
|
||||
|
||||
class Main(object):
|
||||
'''Main class of Feed2toot'''
|
||||
|
||||
def __init__(self):
|
||||
self.main()
|
||||
|
||||
def setup_logging(self, options):
|
||||
if options.syslog:
|
||||
sl = logging.handlers.SysLogHandler(address='/dev/log')
|
||||
sl.setFormatter(logging.Formatter('feed2toot[%(process)d]: %(message)s'))
|
||||
# convert syslog argument to a numeric value
|
||||
loglevel = getattr(logging, options.syslog.upper(), None)
|
||||
if not isinstance(loglevel, int):
|
||||
raise ValueError('Invalid log level: %s' % loglevel)
|
||||
sl.setLevel(loglevel)
|
||||
logging.getLogger('').addHandler(sl)
|
||||
logging.debug('configured syslog level %s' % loglevel)
|
||||
logging.getLogger('').setLevel(logging.DEBUG)
|
||||
sh = logging.StreamHandler()
|
||||
sh.setLevel(options.log_level.upper())
|
||||
logging.getLogger('').addHandler(sh)
|
||||
logging.debug('configured stdout level %s' % sh.level)
|
||||
|
||||
def main(self):
|
||||
"""The main function."""
|
||||
clip = CliParse()
|
||||
clioptions = clip.options
|
||||
self.setup_logging(clioptions)
|
||||
# iterating over the different configuration files
|
||||
cfgp = ConfParse(clioptions)
|
||||
confs = cfgp.confvalues
|
||||
for conf in confs:
|
||||
options = conf[0]
|
||||
config = conf[1]
|
||||
tweetformat = conf[2]
|
||||
feeds = conf[3]
|
||||
plugins = conf[4]
|
||||
# open the persistent list
|
||||
cache = PersistentList(options['cachefile'][0:-3], options['cache_limit'])
|
||||
if options['hashtaglist']:
|
||||
severalwordshashtags = codecs.open(options['hashtaglist'],
|
||||
encoding='utf-8').readlines()
|
||||
severalwordshashtags = [i.rstrip('\n') for i in severalwordshashtags]
|
||||
# reverse feed entries because most recent one should be sent as the last one in Mastodon
|
||||
for feed in feeds:
|
||||
# store the patterns by rss
|
||||
if 'patterns' in feed:
|
||||
patterns = feed['patterns']
|
||||
entries = feed['feed']['entries'][0:clioptions.limit]
|
||||
entries.reverse()
|
||||
# --rss-sections option: print rss sections and exit
|
||||
if clioptions.rsssections:
|
||||
if entries:
|
||||
print('The following sections are available in this RSS feed: {}'.format([j for j in entries[0]]))
|
||||
sys.exit(0)
|
||||
else:
|
||||
sys.exit('Could not parse the section of the rss feed')
|
||||
totweet = []
|
||||
# cache the ids of last rss feeds
|
||||
if not clioptions.all:
|
||||
for i in entries:
|
||||
if 'id' in i and i['id'] not in cache:
|
||||
totweet.append(i)
|
||||
else:
|
||||
totweet = entries
|
||||
|
||||
for entry in totweet:
|
||||
if 'id' not in entry:
|
||||
# malformed feed entry, skip
|
||||
continue
|
||||
logging.debug('found feed entry %s, %s', entry['id'], entry['title'])
|
||||
|
||||
|
||||
rss = {
|
||||
'id': entry['id'],
|
||||
}
|
||||
|
||||
severalwordsinhashtag = False
|
||||
# lets see if the rss feed has hashtag
|
||||
if 'tags' in entry:
|
||||
hastags = True
|
||||
else:
|
||||
hastags = False
|
||||
|
||||
if hastags:
|
||||
rss['hashtags'] = []
|
||||
for i, _ in enumerate(entry['tags']):
|
||||
if 'hashtaglist' in options:
|
||||
prehashtags = entry['tags'][i]['term']
|
||||
tmphashtags = entry['tags'][i]['term']
|
||||
for element in severalwordshashtags:
|
||||
if element in prehashtags:
|
||||
severalwordsinhashtag = True
|
||||
tmphashtags = prehashtags.replace(element,
|
||||
''.join(element.split()))
|
||||
# replace characters stopping a word from being a hashtag
|
||||
if severalwordsinhashtag:
|
||||
# remove ' from hashtag
|
||||
tmphashtags = tmphashtags.replace("'", "")
|
||||
# remove - from hashtag
|
||||
tmphashtags = tmphashtags.replace("-", "")
|
||||
# remove . from hashtag
|
||||
tmphashtags = tmphashtags.replace(".", "")
|
||||
# remove space from hashtag
|
||||
finalhashtags = tmphashtags.replace(" ", "")
|
||||
rss['hashtags'].append('#{}'.format(finalhashtags))
|
||||
else:
|
||||
nospace = ''.join(entry['tags'][i]['term'])
|
||||
# remove space from hashtag
|
||||
nospace = nospace.replace(" ", "")
|
||||
rss['hashtags'].append('#{}'.format(nospace))
|
||||
|
||||
elements=[]
|
||||
for i in tweetformat.split(' '):
|
||||
tmpelement = ''
|
||||
# if i is not an empty string
|
||||
if i:
|
||||
if i.startswith('{') and i.endswith('}'):
|
||||
tmpelement = i.strip('{}')
|
||||
elements.append(tmpelement)
|
||||
# match elements of the tweet format string with available element in the RSS feed
|
||||
fe = FilterEntry(elements, entry, options, feed['patterns'], feed['rssobject'])
|
||||
entrytosend = fe.finalentry
|
||||
if entrytosend:
|
||||
tweetwithnotag = tweetformat.format(**entrytosend)
|
||||
# remove duplicates from the final tweet
|
||||
dedup = RemoveDuplicates(tweetwithnotag)
|
||||
# only append hashtags if they exist
|
||||
# remove last tags if tweet too long
|
||||
if 'hashtags' in rss:
|
||||
addtag = AddTags(dedup.finaltweet, rss['hashtags'])
|
||||
finaltweet = addtag.finaltweet
|
||||
else:
|
||||
finaltweet = dedup.finaltweet
|
||||
|
||||
if clioptions.dryrun:
|
||||
if entrytosend:
|
||||
logging.warning('Tweet should have been sent: {tweet}'.format(tweet=finaltweet))
|
||||
else:
|
||||
logging.debug('This rss entry did not meet pattern criteria. Should have not been sent')
|
||||
else:
|
||||
storeit = True
|
||||
if entrytosend and not clioptions.populate:
|
||||
logging.debug('sending the following tweet:{tweet}'.format(tweet=finaltweet))
|
||||
twp = TootPost(config, finaltweet)
|
||||
storeit = twp.storeit()
|
||||
else:
|
||||
logging.debug('populating RSS entry {}'.format(rss['id']))
|
||||
# in both cas we store the id of the sent tweet
|
||||
if storeit:
|
||||
cache.append(rss['id'])
|
||||
# plugins
|
||||
if plugins and entrytosend:
|
||||
for plugin in plugins:
|
||||
capitalizedplugin = plugin.title()
|
||||
pluginclassname = '{plugin}Plugin'.format(plugin=capitalizedplugin)
|
||||
pluginmodulename = 'feed2toot.plugins.{pluginmodule}'.format(pluginmodule=pluginclassname.lower())
|
||||
try:
|
||||
pluginmodule = importlib.import_module(pluginmodulename)
|
||||
pluginclass = getattr(pluginmodule, pluginclassname)
|
||||
pluginclass(plugins[plugin], finaltweet)
|
||||
except ImportError as err:
|
||||
print(err)
|
||||
# do not forget to close cache (shelf object)
|
||||
cache.close()
|
15
feed2toot/plugins/__init__.py
Normal file
15
feed2toot/plugins/__init__.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
#!/usr/bin/env python3
|
||||
# vim:ts=4:sw=4:ft=python:fileencoding=utf-8
|
||||
# Copyright © 2017 Carl Chenet <carl.chenet@ohmytux.com>
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>
|
42
feed2toot/plugins/influxdbplugin.py
Normal file
42
feed2toot/plugins/influxdbplugin.py
Normal file
|
@ -0,0 +1,42 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright © 2017 Carl Chenet <carl.chenet@ohmytux.com>
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/
|
||||
|
||||
# Push values to a influxdb database
|
||||
'''Push values to a influxdb database'''
|
||||
|
||||
# standard libraries imports
|
||||
import json
|
||||
|
||||
# 3rd party libraries imports
|
||||
from influxdb import InfluxDBClient
|
||||
|
||||
class InfluxdbPlugin(object):
|
||||
'''InfluxdbPlugin class'''
|
||||
def __init__(self, plugininfo, data):
|
||||
'''Constructor of the InfluxdbPlugin class'''
|
||||
self.plugininfo = plugininfo
|
||||
self.data = data
|
||||
self.datatoinfluxdb = []
|
||||
self.client = InfluxDBClient(self.plugininfo['host'],
|
||||
self.plugininfo['port'],
|
||||
self.plugininfo['user'],
|
||||
self.plugininfo['pass'],
|
||||
self.plugininfo['database'])
|
||||
self.main()
|
||||
|
||||
def main(self):
|
||||
'''Main of the PiwikModule class'''
|
||||
self.datatoinfluxdb.append({'measurement': self.plugininfo['measurement'], 'fields': {'value': self.data}})
|
||||
self.client.write_points(self.datatoinfluxdb)
|
63
feed2toot/removeduplicates.py
Normal file
63
feed2toot/removeduplicates.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
#!/usr/bin/env python3
|
||||
# vim:ts=4:sw=4:ft=python:fileencoding=utf-8
|
||||
# Copyright © 2015-2017 Carl Chenet <carl.chenet@ohmytux.com>
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
|
||||
# Remove duplicates from the final string before sending the tweet
|
||||
'''Remove duplicates from the final string before sending the tweet'''
|
||||
|
||||
class RemoveDuplicates(object):
|
||||
'''Remove duplicates from the final string before sending the tweet'''
|
||||
def __init__(self, tweet):
|
||||
'''Constructor of RemoveDuplicates class'''
|
||||
self.tweet = tweet
|
||||
self.main()
|
||||
|
||||
def main(self):
|
||||
'''Main of the RemoveDuplicates class'''
|
||||
# identify duplicate links
|
||||
links = []
|
||||
for element in self.tweet.split():
|
||||
if element != ' ' and (element.startswith('http://') or element.startswith('https://')):
|
||||
newlink = True
|
||||
# if we already found this link, increment the counter
|
||||
for i,_ in enumerate(links):
|
||||
if links[i]['link'] == element:
|
||||
newlink = False
|
||||
links[i]['count'] += 1
|
||||
if newlink:
|
||||
links.append({'link': element, 'count': 1})
|
||||
# remove duplicates
|
||||
validatedlinks = []
|
||||
for i in range(len(links)):
|
||||
if links[i]['count'] >= 2:
|
||||
validatedlinks.append(links[i])
|
||||
wildcard = 'FEED2TOOTWILDCARD'
|
||||
for element in validatedlinks:
|
||||
for i in range(element['count']):
|
||||
# needed for not inversing the order of links if it is a duplicate
|
||||
# and the second link is not one
|
||||
if i == 0:
|
||||
self.tweet = self.tweet.replace(element['link'], wildcard, 1 )
|
||||
else:
|
||||
self.tweet = self.tweet.replace(element['link'], '', 1)
|
||||
# finally
|
||||
self.tweet = self.tweet.replace(wildcard, element['link'], 1)
|
||||
# remove all 2xspaces
|
||||
self.tweet = self.tweet.replace(' ', ' ')
|
||||
|
||||
@property
|
||||
def finaltweet(self):
|
||||
'''return the final tweet after duplicates were removed'''
|
||||
return self.tweet
|
51
feed2toot/tootpost.py
Normal file
51
feed2toot/tootpost.py
Normal file
|
@ -0,0 +1,51 @@
|
|||
# vim:ts=4:sw=4:ft=python:fileencoding=utf-8
|
||||
# Copyright © 2015-2017 Carl Chenet <carl.chenet@ohmytux.com>
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>
|
||||
|
||||
"""Checks an RSS feed and posts new entries to Mastodon."""
|
||||
|
||||
# standard libraires imports
|
||||
from configparser import SafeConfigParser, NoOptionError, NoSectionError
|
||||
from argparse import ArgumentParser
|
||||
import codecs
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
# 3rd party libraries imports
|
||||
import feedparser
|
||||
import tweepy
|
||||
from mastodon import Mastodon
|
||||
|
||||
class TootPost:
|
||||
'''TootPost class'''
|
||||
|
||||
def __init__(self, config, toot):
|
||||
'''Constructore of the TootPost class'''
|
||||
self.config = config
|
||||
self.store = True
|
||||
self.toot = toot
|
||||
self.main()
|
||||
|
||||
def main(self):
|
||||
'''Main of the TweetPost class'''
|
||||
mastodon = Mastodon(
|
||||
client_id = self.config.get('mastodon', 'client_credentials'),
|
||||
access_token = self.config.get('mastodon', 'user_credentials')
|
||||
)
|
||||
mastodon.toot(self.toot)
|
||||
|
||||
def storeit(self):
|
||||
'''Indicate if the tweet should be stored or not'''
|
||||
return self.store
|
Loading…
Add table
Add a link
Reference in a new issue