# Settings for LiWa3 # # Syntax is as follows: # * EVERY line starting with a "#" character is disregarded # * Every non-blank line is in the format "Setting=value" # # If you add channels to the <rcchannels> (separate channels with a space), # then you have to command 'LiWa3 restart diffreader' to make the bot listen to the new channels. # If you change on of the settings below, say '!reset' to implement settings in the bot (certain parts of the bot will have some lag) ircreportchannel=#cvn-sw-spam ircbotchannel=#wikipedia-spam-bot-channel ircalertchannel=#cvn-sw-spam ircelchannel=#wikimedia-external-links ircelchannels=#cvn-wp-es|#BeetstraBotChannel|#wikipedia-en-spam|#wikipedia-spam-t|#wikidata-spam|#wikimedia-external-links|| # these are the wikis which have a XLinkBot blacklist, separate with || # This is where the local blacklists are on the wiki, format of parameter name is '<wiki>_blacklist', more lists can be # specified, separate with | en.wikipedia.org_blacklist=User:XLinkBot/RevertList|User:XLinkBot/RevertReferencesList|User:COIBot/RevertList|User:MER-C/RevertList.css en.wikipedia.org_overridelist=User:XLinkBot/OverrideList|User:MER-C/OverrideList.css|User:COIBot/OverrideList en.wikipedia.org_hardoverridelist=User:XLinkBot/HardOverrideList|User:MER-C/HardOverrideList.css|User:COIBot/HardOverrideList trustedgroups=sysop|bot|bureaucrat|rollbacker|founder|steward|checkuser|editor watchedspaces=0|2|6|10|14|108|118 # see # The bot will read all that are mentioned behind 'site url="http://' # The selection of channels from 'special' needs to be specified here. separate with space special=#commons.wikimedia #meta.wikimedia #species.wikimedia #strategy.wikimedia #mediawiki.wikipedia #wikidata.wikipedia #outreach.wikipedia #nyc.wikimedia # reporter sleep time between posting a next report. # this number has to be tweaked according to the number of reporters active, # and the number of link-addition edits per minute (type: "LiWa3_1 !info" on IRC). # Example, 20 reported link additions per minute with 5 Reporters on IRC would each give 4 reports per minute # or one every 15 seconds. The sleeptime should then be smaller than 15 seconds (so it has also some time to work). # If it builds up a Queue (number after 'M'), either this time is too long, or one needs an extra reporter. # Setting this time too short may result in flooding. linkreportersleeptime=1 # linkwarnersleeptime=1 # linkparsersleeptime=1 # linkanalysersleeptime=1 # Using the parsed wikitext (parseddiff = 1) or the original unparsed diff (parseddiff = 0). useparseddiff=1 maxmessages=5 onelinkperline=1 # Do not save reports for every redirect detected. dontsaveredirects=0 #some wikis are too active and cause crashing of the diffreader. Therefore, they get their own. Crashing them also results then in downtime on these wikis. #Separate with space owndiffreader=#en.wikipedia #wikidata.wikipedia #some wikis are (extremely) high volume but are not having XLinkBot. That results in these still getting high priority and slowing down warning to XLinkBot. #wikis here will be pushed to low priority queue (queue3). Separate with | lowprioritywikis=#wikidata.wikipedia redirectsites=tk|trcx|attf tk_rule=\.tk\b tk_contentrule=\<frame\ssrc=\"(.*?)\" tk_ignorerule=\bdomain\.dot\.tk\b trcx_rule=\.tr\.cx\b trcx_contentrule=\<frame name=\"redirectframe\" src=\"((?:https?|ftp|irc|gopher|telnet|nntp|worldwind):\/\/.*?)\" attf_rule=\.at\.tf\b attf_contentrule=\bdocument\.location\s+=\s+\"(https?.*?)\"; checklocalwikis=1 checklocalgroups=0 largenumberoflinks=5000 largenumberoflinkadditions=2500 # if there are speed troubles, set this to 1 and the bot will report less stats and no XWiki/Local reports (but a significant # increase in speed) noextendedstats=0 noextendedstatslimit=500 lowstatlimit=250 # detect webbugs, adsenses and clickbanks and similar detectids=1 # if there are speed troubles, set this to 1 and the bot will report less stats and no XWiki/Local reports (but a significant # increase in speed) # This setting does the same as the above one (noextendedstats) but then ONLY for pages outside of mainspace. onlymainspacestats=0 onlymainspacestatslimit=2000 lowmainstatlimit=250 # dontstatwhitelistedusers=1 # dontscanwhitelistedlinks=1 # dontscanwhitelistedusers=1 # usercountmax=3000 usercountmax2=5000 autowhitelistusers=1 # linkcountmax=2500 numtemplates=4 template1=(?:https?|ftp|irc|gopher|telnet|nntp|worldwind):\/\/[^\s\]\[\{\}\\\|^`<>]+ template2=(?<![\w\d-])(?<!isbn[\s])\d{3,5}[\s-]\d{2,3}[\s-]\d{4}(?![\d\w-]) template3=(?<![\w\d-])(?<!isbn[\s])\d{6}[\s-]\d{4}(?![\d\w-]) template4=(?<![^\s:])[^\s\]\[\{\}\\\|^\/`<>@:]+@\w+(?!\.htm)(?:\.\w+){1,3} numalerts=2 alert1=\bproof\.php\b alert2=(?<![^\s:])[^\s\]\[\{\}\\\|^\/`<>@:]+@\w+(?!\.htm)(?:\.\w+){1,3} #alert3=(?<![\w\d-])(?<!isbn[\s])(?<!issn[\s\%\(])(?<!\%)\d{3,5}[\s-]\d{2,3}[\s-]\d{4}(?![\d\w-]) #alert4=(?<![\w\d-])(?<!isbn[\s])(?<!issn[\s\%\)])(?<!%)\d{6}[\s-]\d{4}(?![\d\w-]) # Throw an alert when editors add more than XX external links in ONE edit externallinkoverflow=100 numignores=7 # ignore base wiki links. ignore1=https?:\/\/(?:wiki(?:[mp]edia|books|species|source|versity|news)|wiktionary|mediawiki)\.org\/[\?w] # ignore email addresses from mediawiki projects ignore7=@[A-Za-z\-]+\.(?:wiki(?:[mp]edia|books|species|source|versity|news)|wiktionary)(?!\.) # ignore the toolserver ignore2=tools\.wikimedia\.de\b # ignore mediawiki projects themselves ignore3=https?:\/\/[A-Za-z\-]+\.(?:wiki(?:[mp]edia|books|species|source|versity|news)|wiktionary|mediawiki)\.org\/[\?w] # ignore more toolserver ignore4=\btoolserver\.org\b # ignore links that are not links ignore5=^http:\/\/\',$ # ignore some malformed links ignore6=^http\.$ # regex for things that do not need to be scanned for adsense/clickbank and such noidsensing=\.pdf\b|\.doc\b|\.jpg\b|\.gif\b #For the queries - reporting when any of these is passing: #confidence threshold - only report when 'confidence' is higher than this (value between 0-100) # Lets try 25% ## bump to 50% as the username is least reliable, and probably IP address more of a concern forumspam_confidencethreshold=50 #frequency threshold - only report when 'frequency' is higher than this # Lets try minimum of 10 forumspam_frequencythreshold=10 #time threshold - only report when editor was last seen on within the last ## seconds - set to 0 for infinite. # Lets try last 4 weeks for users, 1 week for IPs forumspam_usertimethreshold=2419200 forumspam_iptimethreshold=604800 #username length threshold - only report when the length of the non-IP username is longer than this # Lets try minimum username length of 3, single and double character usernames are too common. forumspam_usernamelengththreshold=2 # Some on-IRC tags - MAKE SURE TAGS ARE UNIQUE, THEY ARE USED IN THE CODE TO TEST AS WELL tag_onlymainspacestats=MSO tag_noextendedstats=NES tag_notcounted=NC tag_dontcount=DC tag_clickbanklink=CB tag_webbuglink=WB tag_adsenselink=AS tag_plimuslink=PL tag_autoredlist=ARL tag_redlist=RL tag_alertlist=AL tag_redirectlink=REDIR tag_whitelistedlink=WL tag_blacklistedlink=BL tag_resolvedip=RESOLVED tag_forumspammer=FS tag_status1XX=STATUS CONTINUE tag_status100=STATUS 100 OK tag_status101=STATUS 101 Continue tag_status2XX=STATUS OK tag_status200=STATUS 200 (OK) tag_status201=STATUS 201 (created) tag_status202=STATUS 202 (accepted) tag_status203=STATUS 203 (non-authorative information) tag_status204=STATUS 204 (no content) tag_status205=STATUS 205 (reset content) tag_status206=STATUS 206 (partial content) tag_status300=STATUS 300 (multiple choice) tag_status301=STATUS 301 (moved permanently) tag_status302=STATUS 302 (found) tag_status303=STATUS 303 (see other) tag_status304=STATUS 304 (not modified) tag_status305=STATUS 305 (use proxy) tag_status306=STATUS 306 (not used) tag_status307=STATUS 307 (temporary redirect) tag_status300=STATUS 400 (bad request) tag_status401=STATUS 401 (unauthorized) tag_status402=STATUS 402 (payment required) tag_status403=STATUS 403 (forbidden) tag_status404=STATUS 404 (not found) tag_status405=STATUS 405 (method not allowed) tag_status406=STATUS 406 (not acceptable) tag_status407=STATUS 407 (proxy authentication required) tag_status408=STATUS 408 (request timeout) tag_status409=STATUS 409 (conflict) tag_status410=STATUS 410 (gone) tag_status411=STATUS 411 (length required) tag_status412=STATUS 412 (precondition failed) tag_status413=STATUS 413 (request entity too long) tag_status414=STATUS 414 (request-URI too long) tag_status415=STATUS 415 (unsupported media type) tag_status416=STATUS 416 (requested range not satisfiable) tag_status417=STATUS 417 (expectation failed) tag_status500=STATUS 500 (internal server error) tag_status501=STATUS 501 (not implemented) tag_status502=STATUS 502 (bad gateway) tag_status503=STATUS 503 (service unavailable) tag_status504=STATUS 504 (gateway timeout) tag_status505=STATUS 505 (HTTP version not supported) tag_noredirectlink=NOREDIR # More tags - just for display, no need to be unique tag_veryhighcount=!! tag_highcount=! tag_highalert=!! tag_lowalert=! # Since we have some bots running around doing A LOT of work, making life for linkwatcher impossible: # totallyignoredusers, is a '|' separated list, either containing: # a username in each item (which is then globally ignored), or # an item 'username@wiki' (username and wiki separated with @) for ignoring only the user on a certain wiki (wikiname is as in the url, '', '', '') totallyignoredusers=Legobot|Addbot|Sk!dbot|COIBot|GZWDer (flood)||||| # totallyignoredgroups, is a '|' separated list containing an item 'right@wiki' (right and wiki separated with @) for ignoring all the with a certain right on a certain wiki (as just above for totallyignoredusers) # global right ignoring is NOT supported.|| #This determines the places for the on-wiki whitelists and similar wikilinkwhitelist=User:LiWa3/linkwhitelist.css wikiuserwhitelist=User:LiWa3/userwhitelist.css wikidonotcountdomainlist=User:LiWa3/donotcountlist.css # These are cloaks of trusted users who can always use all commands of COIBot: trustedusers=user/beetstra|wikimedia/Versageek|wikipedia/MER-C|wikimedia/Igna|wikipedia/pdpc.professional.kaare|wikimedia/Martin-Urbanec|wikisource/billinghurst|wikipedia/Trijnstel|wikipedia/Shanmugamp7|wikimedia/Glaisher|wikimedia/marcoaurelio|wikimedia/-revi|wikimedia/Zabshk|countervandalism/Syum90|wikipedia/TheresNoTime|wikimedia/JonKolbert|wikipedia/Praxidicae|wikimedia/L235|wikipedia/ZI-Jony|wikipedia/GeneralNotability|wikimedia/ToBeFree|wikipedia/perryprog #domains for which we do not store webbugs and similar (regex on sortable domainname, i.e. the order of each part between the dots has been reverted, split multiple by |). notracingstorage=archive\.org|web\.archive\.org #