#

 
# Settings for LiWa3
#
# Syntax is as follows: 
#   * EVERY line starting with a "#" character is disregarded
#   * Every non-blank line is in the format "Setting=value"
#
# If you add channels to the <rcchannels> (separate channels with a space), 
#   then you have to command 'LiWa3 restart diffreader' to make the bot listen to the new channels.

# If you change on of the settings below, say '!reset' to implement settings in the bot (certain parts of the bot will have some lag)

ircreportchannel=#cvn-sw-spam
ircbotchannel=#wikipedia-spam-bot-channel
ircalertchannel=#cvn-sw-spam
ircelchannel=#wikimedia-external-links
ircelchannels=#cvn-wp-es|#BeetstraBotChannel|#wikipedia-en-spam|#wikipedia-spam-t|#wikidata-spam|#wikimedia-external-links

ownchannel=es.wikipedia.org|en.wikipedia.org|www.wikidata.org

spam.en.wikipedia.org=#wikipedia-en-spam
alert.en.wikipedia.org=#wikipedia-spam-t
warning.en.wikipedia.org=#wikipedia-spam-t
warn.en.wikipedia.org=#wikipedia-spam-bot-channel

#spam.fa.wikipedia.org=#cvn-wp-fa
#alert.fa.wikipedia.org=#cvn-wp-fa
#warning.fa.wikipedia.org=#cvn-wp-fa

warn.commons.wikimedia.org=#wikipedia-spam-bot-channel

#alert.es.wikipedia.org=#cvn-wp-es
#warning.es.wikipedia.org=#cvn-wp-es

#spam.de.wikipedia.org=#cvn-wp-de
#alert.de.wikipedia.org=#cvn-wp-de
#warning.de.wikipedia.org=#cvn-wp-de

spam.www.wikidata.org=#wikidata-spam
alert.www.wikidata.org=#wikidata-spam
warning.www.wikidata.org=#wikidata-spam

# these are the wikis which have a XLinkBot blacklist, separate with |
warningwikis=en.wikipedia.org|commons.wikimedia.org

# This is where the local blacklists are on the wiki, format of parameter name is '<wiki>_blacklist', more lists can be
#  specified, separate with |
en.wikipedia.org_blacklist=User:XLinkBot/RevertList|User:XLinkBot/RevertReferencesList|User:COIBot/RevertList|User:MER-C/RevertList.css
en.wikipedia.org_overridelist=User:XLinkBot/OverrideList|User:MER-C/OverrideList.css|User:COIBot/OverrideList
en.wikipedia.org_hardoverridelist=User:XLinkBot/HardOverrideList|User:MER-C/HardOverrideList.css|User:COIBot/HardOverrideList

trustedgroups=sysop|bot|bureaucrat|rollbacker|founder|steward|checkuser|editor

watchedspaces=0|2|6|10|14|108|118

# see http://en.wikipedia.org/w/api.php?action=sitematrix
# The bot will read all that are mentioned behind 'site url="http://'
# The selection of channels from 'special' needs to be specified here. separate with space
special=#commons.wikimedia #meta.wikimedia #species.wikimedia #strategy.wikimedia #mediawiki.wikipedia #wikidata.wikipedia #outreach.wikipedia #nyc.wikimedia

# reporter sleep time between posting a next report.
# this number has to be tweaked according to the number of reporters active, 
#  and the number of link-addition edits per minute (type: "LiWa3_1 !info" on IRC).
# Example, 20 reported link additions per minute with 5 Reporters on IRC would each give 4 reports per minute
#   or one every 15 seconds.  The sleeptime should then be smaller than 15 seconds (so it has also some time to work).
#   If it builds up a Queue (number after 'M'), either this time is too long, or one needs an extra reporter.
#   Setting this time too short may result in flooding.
linkreportersleeptime=1

#
linkwarnersleeptime=1

#
linkparsersleeptime=1

#
linkanalysersleeptime=1

# Using the parsed wikitext (parseddiff = 1) or the original unparsed diff (parseddiff = 0).
useparseddiff=1

maxmessages=5

onelinkperline=1

# Do not save reports for every redirect detected.
dontsaveredirects=0

#some wikis are too active and cause crashing of the diffreader.  Therefore, they get their own.  Crashing them also results then in downtime on these wikis. 
#Separate with space
owndiffreader=#en.wikipedia #wikidata.wikipedia

#some wikis are (extremely) high volume but are not having XLinkBot.  That results in these still getting high priority and slowing down warning to XLinkBot. 
#wikis here will be pushed to low priority queue (queue3).  Separate with |
lowprioritywikis=#wikidata.wikipedia

redirectsites=tk|trcx|attf

tk_rule=\.tk\b
tk_contentrule=\<frame\ssrc=\"(.*?)\"
tk_ignorerule=\bdomain\.dot\.tk\b

trcx_rule=\.tr\.cx\b
trcx_contentrule=\<frame name=\"redirectframe\" src=\"((?:https?|ftp|irc|gopher|telnet|nntp|worldwind):\/\/.*?)\"

attf_rule=\.at\.tf\b
attf_contentrule=\bdocument\.location\s+=\s+\"(https?.*?)\";

checklocalwikis=1
checklocalgroups=0

largenumberoflinks=5000
largenumberoflinkadditions=2500

# if there are speed troubles, set this to 1 and the bot will report less stats and no XWiki/Local reports (but a significant
# increase in speed)
noextendedstats=0
noextendedstatslimit=500
lowstatlimit=250

# detect webbugs, adsenses and clickbanks and similar
detectids=1

# if there are speed troubles, set this to 1 and the bot will report less stats and no XWiki/Local reports (but a significant
# increase in speed)
# This setting does the same as the above one (noextendedstats) but then ONLY for pages outside of mainspace.
onlymainspacestats=0
onlymainspacestatslimit=2000
lowmainstatlimit=250

#
dontstatwhitelistedusers=1

#
dontscanwhitelistedlinks=1

#
dontscanwhitelistedusers=1

#
usercountmax=3000
usercountmax2=5000
autowhitelistusers=1

#
linkcountmax=2500

numtemplates=4
template1=(?:https?|ftp|irc|gopher|telnet|nntp|worldwind):\/\/[^\s\]\[\{\}\\\|^`<>]+
template2=(?<![\w\d-])(?<!isbn[\s])\d{3,5}[\s-]\d{2,3}[\s-]\d{4}(?![\d\w-])
template3=(?<![\w\d-])(?<!isbn[\s])\d{6}[\s-]\d{4}(?![\d\w-])
template4=(?<![^\s:])[^\s\]\[\{\}\\\|^\/`<>@:]+@\w+(?!\.htm)(?:\.\w+){1,3}

numalerts=2
alert1=\bproof\.php\b
alert2=(?<![^\s:])[^\s\]\[\{\}\\\|^\/`<>@:]+@\w+(?!\.htm)(?:\.\w+){1,3}
#alert3=(?<![\w\d-])(?<!isbn[\s])(?<!issn[\s\%\(])(?<!\%)\d{3,5}[\s-]\d{2,3}[\s-]\d{4}(?![\d\w-])
#alert4=(?<![\w\d-])(?<!isbn[\s])(?<!issn[\s\%\)])(?<!%)\d{6}[\s-]\d{4}(?![\d\w-])

# Throw an alert when editors add more than XX external links in ONE edit
externallinkoverflow=100

numignores=7
# ignore base wiki links.
ignore1=https?:\/\/(?:wiki(?:[mp]edia|books|species|source|versity|news)|wiktionary|mediawiki)\.org\/[\?w]
# ignore email addresses from mediawiki projects
ignore7=@[A-Za-z\-]+\.(?:wiki(?:[mp]edia|books|species|source|versity|news)|wiktionary)(?!\.)
# ignore the toolserver
ignore2=tools\.wikimedia\.de\b
# ignore mediawiki projects themselves
ignore3=https?:\/\/[A-Za-z\-]+\.(?:wiki(?:[mp]edia|books|species|source|versity|news)|wiktionary|mediawiki)\.org\/[\?w]
# ignore more toolserver
ignore4=\btoolserver\.org\b
# ignore links that are not links
ignore5=^http:\/\/\',$
# ignore some malformed links
ignore6=^http\.$

# regex for things that do not need to be scanned for adsense/clickbank and such
noidsensing=\.pdf\b|\.doc\b|\.jpg\b|\.gif\b

#For the stopforumspam.com queries - reporting when any of these is passing:
#confidence threshold - only report when 'confidence' is higher than this (value between 0-100)
# Lets try 25% ## bump to 50% as the username is least reliable, and probably IP address more of a concern
forumspam_confidencethreshold=50
#frequency threshold - only report when 'frequency' is higher than this
# Lets try minimum of 10
forumspam_frequencythreshold=10
#time threshold - only report when editor was last seen on stopforumspam.com within the last ## seconds - set to 0 for infinite.
# Lets try last  4 weeks for users, 1 week for IPs
forumspam_usertimethreshold=2419200
forumspam_iptimethreshold=604800
#username length threshold - only report when the length of the non-IP username is longer than this
# Lets try minimum username length of 3, single and double character usernames are too common.
forumspam_usernamelengththreshold=2

# Some on-IRC tags - MAKE SURE TAGS ARE UNIQUE, THEY ARE USED IN THE CODE TO TEST AS WELL
tag_onlymainspacestats=MSO
tag_noextendedstats=NES
tag_notcounted=NC
tag_dontcount=DC
tag_clickbanklink=CB
tag_webbuglink=WB
tag_adsenselink=AS
tag_plimuslink=PL
tag_autoredlist=ARL
tag_redlist=RL
tag_alertlist=AL
tag_redirectlink=REDIR
tag_whitelistedlink=WL
tag_blacklistedlink=BL
tag_resolvedip=RESOLVED
tag_forumspammer=FS
tag_status1XX=STATUS CONTINUE
tag_status100=STATUS 100 OK
tag_status101=STATUS 101 Continue
tag_status2XX=STATUS OK
tag_status200=STATUS 200 (OK)
tag_status201=STATUS 201 (created)
tag_status202=STATUS 202 (accepted)
tag_status203=STATUS 203 (non-authorative information)
tag_status204=STATUS 204 (no content)
tag_status205=STATUS 205 (reset content)
tag_status206=STATUS 206 (partial content)
tag_status300=STATUS 300 (multiple choice)
tag_status301=STATUS 301 (moved permanently)
tag_status302=STATUS 302 (found)
tag_status303=STATUS 303 (see other)
tag_status304=STATUS 304 (not modified)
tag_status305=STATUS 305 (use proxy)
tag_status306=STATUS 306 (not used)
tag_status307=STATUS 307 (temporary redirect)
tag_status300=STATUS 400 (bad request)
tag_status401=STATUS 401 (unauthorized)
tag_status402=STATUS 402 (payment required)
tag_status403=STATUS 403 (forbidden)
tag_status404=STATUS 404 (not found)
tag_status405=STATUS 405 (method not allowed)
tag_status406=STATUS 406 (not acceptable)
tag_status407=STATUS 407 (proxy authentication required)
tag_status408=STATUS 408 (request timeout)
tag_status409=STATUS 409 (conflict)
tag_status410=STATUS 410 (gone)
tag_status411=STATUS 411 (length required)
tag_status412=STATUS 412 (precondition failed)
tag_status413=STATUS 413 (request entity too long)
tag_status414=STATUS 414 (request-URI too long)
tag_status415=STATUS 415 (unsupported media type)
tag_status416=STATUS 416 (requested range not satisfiable)
tag_status417=STATUS 417 (expectation failed)
tag_status500=STATUS 500 (internal server error)
tag_status501=STATUS 501 (not implemented)
tag_status502=STATUS 502 (bad gateway)
tag_status503=STATUS 503 (service unavailable)
tag_status504=STATUS 504 (gateway timeout)
tag_status505=STATUS 505 (HTTP version not supported)
tag_noredirectlink=NOREDIR

# More tags - just for display, no need to be unique
tag_veryhighcount=!!
tag_highcount=!
tag_highalert=!!
tag_lowalert=!

# Since we have some bots running around doing A LOT of work, making life for linkwatcher impossible:
# totallyignoredusers, is a '|' separated list, either containing:
#   a username in each item (which is then globally ignored), or
#   an item 'username@wiki' (username and wiki separated with @) for ignoring only the user on a certain wiki (wikiname is as in the url, 'meta.wikimedia.org', 'www.wikidata.org', 'en.wikipedia.org')
totallyignoredusers=Legobot|Addbot|Sk!dbot|COIBot|GZWDer (flood)@www.wikidata.org|S205643bot@www.wikidata.org|QuickStatementsBot@www.wikidata.org|BotNinja@www.wikidata.org|ProteinBoxBot@www.wikidata.org|JarBot@ar.wikipedia.org
# totallyignoredgroups, is a '|' separated list containing an item 'right@wiki' (right and wiki separated with @) for ignoring all the with a certain right on a certain wiki (as just above for totallyignoredusers)
# global right ignoring is NOT supported.
totallyignoredgroups=flood@www.wikidata.org|bot@www.wikidata.org|bot@commons.wikimedia.org

#This determines the places for the on-wiki whitelists and similar
wikilinkwhitelist=User:LiWa3/linkwhitelist.css
wikiuserwhitelist=User:LiWa3/userwhitelist.css
wikidonotcountdomainlist=User:LiWa3/donotcountlist.css

# These are cloaks of trusted users who can always use all commands of COIBot:
trustedusers=user/beetstra|wikimedia/Versageek|wikipedia/MER-C|wikimedia/Igna|wikipedia/pdpc.professional.kaare|wikimedia/Martin-Urbanec|wikisource/billinghurst|wikipedia/Trijnstel|wikipedia/Shanmugamp7|wikimedia/Glaisher|wikimedia/marcoaurelio|wikimedia/-revi|wikimedia/Zabshk|countervandalism/Syum90|wikipedia/TheresNoTime|wikimedia/JonKolbert|wikipedia/Praxidicae|wikimedia/L235|wikipedia/ZI-Jony|wikipedia/GeneralNotability|wikimedia/ToBeFree|wikipedia/perryprog

#