User:Millbot-Stats/genstats.py
Note that program is not licensed under GFDL, but under AGPL!
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Millbot-Stats, v. 1.1. A bot for generating statistics at MediaWiki sites.
# Copyright (C) 2008 Milos Rancic <millosh@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import sys
import time
import pickle
import stats
mydir = "./"
sys.path.append(mydir)
sys.path.append(mydir + "pywikipedia/")
from os.path import *
from wikipedia import *
#languages['ar'] = {
# 'name': "Arabic",
# 'full name': "Arabic language",
# }
#projects['wikipedia'] = {
# 'baseurl': "wikipedia.org",
# 'lang addition type': "prefix",
# 'suffix': "/wiki/",
# 'langs': [ 'ar', '...', ],
# 'template': 'Template:Wikipedia statistics',
# }
projects = pickle.load(open("projects.pickle"))
languages = pickle.load(open("languages.pickle"))
transfile = mydir + "translations.conf.py"
execfile(transfile)
for plang in languages:
if plang not in translations:
translations[plang] = {}
for slang in languages:
if slang not in translations[plang]:
translations[plang][slang] = languages[slang]['full name']
if languages[slang]['name'] not in translations[plang]:
translations[plang][languages[slang]['name']] = languages[slang]['name']
datadir = mydir + "data/"
#wikipedia_project = projects['wikipedia']
##projects = { 'wikiversity': projects['wikiversity'] }
pr_dictwikis = {}
pr_totals = {}
pr_listwikis = {}
truefalse = {}
for fam in projects:
if fam not in truefalse:
truefalse[fam] = {}
project = projects[fam]
langs = project['langs']
listwikis = []
dictwikis = {}
totalgood = 0
totaltotal = 0
totaledits = 0
totaladmins = 0
totalusers = 0
totalimages = 0
oyear = time.strftime("%Y")
omont = time.strftime("%m")
odate = time.strftime("%d")
ohour = time.strftime("%H")
ominu = time.strftime("%M")
for lang in langs:
if project['lang addition type'] == 'prefix':
url = "http://" + lang + "." + project['baseurl'] + project['suffix'] + "Special:Statistics?action=raw"
# else: ... # define your own types
daydir = datadir + oyear + "/" + omont + "/" + odate + "/"
odir = daydir + fam + "/" + lang + "/"
if not isdir(odir):
cmd = "mkdir -p " + odir
os.system(cmd)
fd = odir + "raw-stats-" + ohour + "-" + ominu + ".txt"
command = "wget -O " + fd + " " + url
os.system(command)
row = file(fd).read()[:-1]
if len(row) > 0:
cols = re.split(";",row)
good = re.sub("^.*?;good=([0-9]+);.*?$","\g<1>",row)
total = re.sub("^total=([0-9]+);.*?$","\g<1>",row)
edits = re.sub("^.*?;edits=([0-9]+);.*?$","\g<1>",row)
admins = re.sub("^.*?;admins=([0-9]+);.*?$","\g<1>",row)
users = re.sub("^.*?;users=([0-9]+);.*?$","\g<1>",row)
images = re.sub("^.*?;images=([0-9]+);.*?$","\g<1>",row)
index = float(good)
while index in dictwikis:
index -= 0.001
listwikis.append(index)
dictwikis[index] = {
'true': 'true',
'code': lang,
'good': good,
'total': total,
'edits': edits,
'admins': admins,
'users': users,
'images': images,
'time': time.strftime("%Y-%m-%d %H:%M:%S"),
}
totalgood += int(good)
totaltotal += int(total)
totaledits += int(edits)
totaladmins += int(admins)
totalusers += int(users)
totalimages += int(images)
truefalse[fam][lang] = "true"
else:
truefalse[fam][lang] = "false"
totals = {
'totalgood': totalgood,
'totaltotal': totaltotal,
'totaledits': totaledits,
'totaladmins': totaladmins,
'totalusers': totalusers,
'totalimages': totalimages,
'totaltime': time.strftime("%Y-%m-%d %H:%M:%S")
}
listwikis.sort()
listwikis.reverse()
pr_dictwikis[fam] = dictwikis
pr_totals[fam] = totals
pr_listwikis[fam] = listwikis
stats_sites = {}
stats_sites['wikipedia'] = {}
stats_sites['wikipedia']['sr'] = {
'projects': [
'wikipedia', 'wiktionary', 'wikibooks', 'wikinews',
'wikisource', 'wikiversity', 'wikiquote',
],
}
stats_sites['wiktionary'] = {}
stats_sites['wikibooks'] = {}
stats_sites['wikinews'] = {}
stats_sites['wikisource'] = {}
stats_sites['wikiversity'] = {}
stats_sites['wikiquote'] = {}
for st in stats_sites:
sites = stats_sites[st]
for s in sites:
for fam in sites[s]['projects']:
dictwikis = pr_dictwikis[fam]
totals = pr_totals[fam]
listwikis = pr_listwikis[fam]
outfile = daydir + fam + "/stats-" + '-for-' + st + "-" + s + ".txt"
stats.engine(projects[st]['baseurl'],mydir,outfile,s,listwikis,dictwikis,translations,totals,sites)
#stats.engine(wikipedia_project['baseurl'],mydir,outfile,s,listwikis,dictwikis,translations,totals,sites)
content = file(outfile).read().decode('utf-8')
site = getSite(s,st)
t = translations[s][projects[fam]['template']]
c = translations[s]['Bot: Updating statistics']
print t
page = Page(s,t.decode('utf-8'))
page.put(content,comment=c.decode('utf-8'))