Research talk:Revision scoring as a service/Work log/2015-05-16
Latest comment: 9 years ago by EpochFail in topic Saturday, May 16, 2015
Saturday, May 16, 2015
editToday I am filling in automatically generated labels for privileged users.
Here's the query I am using to get the priv. user edits:
SELECT DISTINCT "enwiki" AS wiki, rev_id
FROM staging.rev_ids_20k_sample
INNER JOIN enwiki.revision USING(rev_id)
INNER JOIN enwiki.user_groups ON ug_user = rev_user
WHERE wiki = "enwiki" AND
ug_group IN ("oversight", "bot", "rollbacker", "checkuser", "abusefilter", "reviewer",
"sysop", "templateeditor", "autoreviewer", "ipblock-exempt", "filemover",
"import", "accountcreator", "bureaucrat", "eliminator")
UNION ALL
SELECT DISTINCT "ptwiki" AS wiki, rev_id
FROM staging.rev_ids_20k_sample
INNER JOIN ptwiki.revision USING(rev_id)
INNER JOIN ptwiki.user_groups ON ug_user = rev_user
WHERE wiki = "ptwiki" AND
ug_group IN ("bot", "autoreviewer", "sysop", "bureaucrat", "rollbacker", "checkuser",
"eliminator", "oversight")
UNION ALL
SELECT DISTINCT "trwiki" AS wiki, rev_id
FROM staging.rev_ids_20k_sample
INNER JOIN trwiki.revision USING(rev_id)
INNER JOIN trwiki.user_groups ON ug_user = rev_user
WHERE wiki = "trwiki" AND
ug_group IN ("bot", "autoreviewer", "sysop", "bureaucrat", "rollbacker", "checkuser",
"eliminator", "oversight")
UNION ALL
SELECT DISTINCT "fawiki" AS wiki, rev_id
FROM staging.rev_ids_20k_sample
INNER JOIN trwiki.revision USING(rev_id)
INNER JOIN trwiki.user_groups ON ug_user = rev_user
WHERE wiki = "fawiki" AND
ug_group IN ("templateeditor", "OTRS-member", "sysop", "rollbacker", "bureaucrat",
"Image-reviewer", "autopatrol", "patroller", "botadmin", "abusefilter",
"bot", "eliminator");
Now to import the data to the labels DB and perform the auto-labeling. I'll be using my test user User:Wiki labeler to do the work.
wikilabels=> CREATE TEMPORARY TABLE rev_ids_to_filter (rev_id INT, wiki VARCHAR(50));
CREATE TABLE
wikilabels=> \copy rev_ids_to_filter FROM './rev_ids.20k_sample.filtered.tsv';
wikilabels=> select count(*) FROM rev_ids_to_filter;
count
-------
43078
(1 row)
wikilabels=> select * from rev_ids_to_filter limit 2;
rev_id | wiki
-----------+--------
604245449 | enwiki
604251853 | enwiki
(2 rows)
wikilabels=> \d workset
Table "public.workset"
Column | Type | Modifiers
-------------+-----------------------------+------------------------------------------------------
id | integer | not null default nextval('workset_id_seq'::regclass)
campaign_id | integer |
user_id | integer |
created | timestamp without time zone |
expires | timestamp without time zone |
Indexes:
"workset_pkey" PRIMARY KEY, btree (id)
"workset_user" btree (user_id)
wikilabels=> select id, wiki, name from campaign where active;
id | wiki | name
----+--------+-------------------------------------------------------
4 | enwiki | Edit quality (20k random sample, 2015)
8 | azwiki | Edit quality (20k random sample, 2015)
5 | trwiki | Değişiklik kalitesi (20,000 rastgele örnekleme, 2015)
6 | fawiki | کیفیت ویرایش (نمونه تصادفی ۲۰ هزارتایی، ۲۰۱۵)
7 | ptwiki | Qualidade das edições (amostra de 20k revisões, 2015)
(5 rows)
wikilabels=> INSERT INTO workset (campaign_id, user_id, created, expires) VALUES (4, 41948920, NOW(), NOW()) RETURNING id;
id
----
83
(1 row)
INSERT 0 1
wikilabels=> INSERT INTO workset (campaign_id, user_id, created, expires) VALUES (5, 41948920, NOW(), NOW()) RETURNING id;
id
----
84
(1 row)
INSERT 0 1
wikilabels=> INSERT INTO workset (campaign_id, user_id, created, expires) VALUES (6, 41948920, NOW(), NOW()) RETURNING id;
id
----
85
(1 row)
INSERT 0 1
wikilabels=> INSERT INTO workset (campaign_id, user_id, created, expires) VALUES (7, 41948920, NOW(), NOW()) RETURNING id;
id
----
86
(1 row)
INSERT 0 1
wikilabels=> \d workset_task
Table "public.workset_task"
Column | Type | Modifiers
------------+---------+-----------
workset_id | integer | not null
task_id | integer | not null
Indexes:
"workset_task_pkey" PRIMARY KEY, btree (workset_id, task_id)
Foreign-key constraints:
"workset_task_task_id_fkey" FOREIGN KEY (task_id) REFERENCES task(id)
wikilabels=> SELECT 83, task.id FROM task WHERE campaign_id = 4 AND (task.data->'rev_id')::text::int IN (SELECT rev_id FROM rev_ids_to_filter WHERE wiki = 'enwiki' LIMIT 10);
?column? | id
----------+--------
83 | 180109
83 | 184846
83 | 186126
83 | 190804
83 | 192186
83 | 192371
83 | 193597
83 | 195389
83 | 198193
83 | 198394
(10 rows)
wikilabels=> INSERT INTO workset_task SELECT 83, task.id FROM task WHERE campaign_id = 4 AND (task.data->'rev_id')::text::int IN (SELECT rev_id FROM rev_ids_to_filter WHERE wiki = 'enwiki');
INSERT 0 9557
wikilabels=> INSERT INTO workset_task SELECT 84, task.id FROM task WHERE campaign_id = 5 AND (task.data->'rev_id')::text::int IN (SELECT rev_id FROM rev_ids_to_filter WHERE wiki = 'trwiki');
INSERT 0 5470
wikilabels=> INSERT INTO workset_task SELECT 85, task.id FROM task WHERE campaign_id = 6 AND (task.data->'rev_id')::text::int IN (SELECT rev_id FROM rev_ids_to_filter WHERE wiki = 'fawiki');
INSERT 0 17485
wikilabels=> INSERT INTO workset_task SELECT 86, task.id FROM task WHERE campaign_id = 7 AND (task.data->'rev_id')::text::int IN (SELECT rev_id FROM rev_ids_to_filter WHERE wiki = 'ptwiki');
INSERT 0 10566
wikilabels=> \d label
Table "public.label"
Column | Type | Modifiers
-----------+-----------------------------+-----------
task_id | integer | not null
user_id | integer | not null
timestamp | timestamp without time zone |
data | json |
Indexes:
"label_pkey" PRIMARY KEY, btree (task_id, user_id)
"label_user" btree (user_id)
^
wikilabels=> SELECT task_id, user_id, NOW(), '{"damaging":false,"goodfaith":true,"unsure":true,"automatic":true}'::json FROM workset INNER JOIN workset_task ON workset.id = workset_id WHERE workset.id IN (83, 84, 85, 86) limit 2;
task_id | user_id | now | json
---------+----------+-------------------------------+--------------------------------------------------------------------
180077 | 41948920 | 2015-05-16 16:16:31.889927+00 | {"damaging":false,"goodfaith":true,"unsure":true,"automatic":true}
180079 | 41948920 | 2015-05-16 16:16:31.889927+00 | {"damaging":false,"goodfaith":true,"unsure":true,"automatic":true}
(2 rows)
wikilabels=> SELECT COUNT(*) FROM (SELECT task_id, user_id, NOW(), '{"damaging":false,"goodfaith":true,"unsure":true,"automatic":true}'::json FROM workset INNER JOIN workset_task ON workset.id = workset_id WHERE workset.id IN (83, 84, 85, 86)) AS foo;
count
-------
43078
(1 row)
wikilabels=> INSERT INTO label SELECT task_id, user_id, NOW(), '{"damaging":false,"goodfaith":true,"unsure":true,"automatic":true}'::json FROM workset INNER JOIN workset_task ON workset.id = workset_id WHERE workset.id IN (83, 84, 85, 86);
INSERT 0 43078
Done --EpochFail (talk) 16:22, 16 May 2015 (UTC)