Difference between revisions of "Site:Developer stuff/Analysis of edits"
From Feast upon the Word (http://feastupontheword.org). Copyright, Feast upon the Word.
m (removing duplicate code) |
m (small formatting changes) |
||
| Line 18: | Line 18: | ||
# * old doesn't contain the most recent version. It only contains the history. | # * old doesn't contain the most recent version. It only contains the history. | ||
Create table test.commentary_edits | Create table test.commentary_edits | ||
| − | select a.cur_id, a.cur_namespace, a.cur_title, b.old_id, b.old_user_text, b.old_timestamp, | + | select a.cur_id, a.cur_namespace, a.cur_title, b.old_id, b.old_user_text, |
| − | + | b.old_timestamp, b.old_minor_edit | |
| − | old_minor_edit | + | |
from feastupontheword.cur a, feastupontheword.old b | from feastupontheword.cur a, feastupontheword.old b | ||
where a.cur_namespace=b.old_namespace and a.cur_title=b.old_title | where a.cur_namespace=b.old_namespace and a.cur_title=b.old_title | ||
| Line 56: | Line 55: | ||
# Currently no problem due to different range of values | # Currently no problem due to different range of values | ||
insert into test.all_scripture_edits | insert into test.all_scripture_edits | ||
| − | select b.cur_id, b.cur_namespace, b.cur_title, b.cur_id as old_id, b.cur_user_text as old_user_text, | + | select b.cur_id, b.cur_namespace, b.cur_title, b.cur_id as old_id, |
| − | b.cur_timestamp as old_timestamp, b.cur_minor_edit as old_minor_edit | + | b.cur_user_text as old_user_text, b.cur_timestamp as old_timestamp, |
| + | b.cur_minor_edit as old_minor_edit | ||
from test.del1 a, feastupontheword.cur b | from test.del1 a, feastupontheword.cur b | ||
where a.cur_id=b.cur_id | where a.cur_id=b.cur_id | ||
| Line 79: | Line 79: | ||
#get a list of changes on those talk pages from old | #get a list of changes on those talk pages from old | ||
create table test.scripture_talk_edits | create table test.scripture_talk_edits | ||
| − | select a.commentary_cur_id, a.cur_id, a.cur_namespace, a.cur_title, b.old_id, b.old_user_text, | + | select a.commentary_cur_id, a.cur_id, a.cur_namespace, a.cur_title, b.old_id, |
| − | + | b.old_user_text, b.old_timestamp, old_minor_edit | |
| − | b.old_timestamp, old_minor_edit | + | |
from test.cur_scriptures_talk a, feastupontheword.old b | from test.cur_scriptures_talk a, feastupontheword.old b | ||
where a.cur_namespace=b.old_namespace and a.cur_title=b.old_title | where a.cur_namespace=b.old_namespace and a.cur_title=b.old_title | ||
| Line 91: | Line 90: | ||
#add cur versions to that list | #add cur versions to that list | ||
insert into test.scripture_talk_edits | insert into test.scripture_talk_edits | ||
| − | select a.commentary_cur_id, a.cur_id, a.cur_namespace, a.cur_title, a.cur_id as old_id, | + | select a.commentary_cur_id, a.cur_id, a.cur_namespace, a.cur_title, |
| − | + | a.cur_id as old_id, a.cur_user_text as old_user_text, | |
| − | a.cur_user_text as old_user_text, | + | |
a.cur_timestamp as old_timestamp, a.cur_minor_edit as old_minor_edit | a.cur_timestamp as old_timestamp, a.cur_minor_edit as old_minor_edit | ||
from test.cur_scriptures_talk a | from test.cur_scriptures_talk a | ||
| Line 147: | Line 145: | ||
# * The last set of 100 will actually only have 95 in it. | # * The last set of 100 will actually only have 95 in it. | ||
create table test.del2 | create table test.del2 | ||
| − | select floor((cur_id-65257)/100) as set_100, min(cur_id) as min_cur_id, count(*) as pages, | + | select floor((cur_id-65257)/100) as set_100, min(cur_id) as min_cur_id, |
| − | + | count(*) as pages, sum(total_edits) as total_edits, | |
| − | sum(total_edits) as total_edits, | + | |
sum(commentary_edits) as commentary_edits, sum(talk_edits) as talk_edits, | sum(commentary_edits) as commentary_edits, sum(talk_edits) as talk_edits, | ||
sum(minor_edits) as minor_edits | sum(minor_edits) as minor_edits | ||
Revision as of 22:33, 26 November 2005
<nowiki> # use these commands to clean up temporary tables created in this code. drop table test.commentary_edits drop table test.del1 drop table test.all_scripture_edits drop table test.scripture_talk_edits drop table test.cur_scriptures_talk drop table test.scripture_edits_page_summary drop table test.scripture_edits_all_page_summary drop table test.del2 drop table test.scripture_edits_set_100_summary #Get a list of page edits out of old on commentary pages. # * 65257 and 73651 is the range of commentary pages for the scriptures. # * some pages (e.g. BOM title page, Article of Faith pages, Official declaration pages) # are excluded. I'm not bothering with them for now. # * old doesn't contain the most recent version. It only contains the history. Create table test.commentary_edits select a.cur_id, a.cur_namespace, a.cur_title, b.old_id, b.old_user_text, b.old_timestamp, b.old_minor_edit from feastupontheword.cur a, feastupontheword.old b where a.cur_namespace=b.old_namespace and a.cur_title=b.old_title and a.cur_id between 65257 and 73651 #get count Select count(*) from test.commentary_edits #2503 #Get the initial page content for each page # * I don't want to count the original commentary page content as an edit create table test.del1 select a.cur_id, min(old_id) as min_old_id from test.commentary_edits a group by a.cur_id #get count Select count(*) from test.del1 #797 #Get the commentary page edits from old excluding the original content create table test.all_scripture_edits select a.* from test.commentary_edits a, test.del1 b where a.old_id<>b.min_old_id and a.cur_id=b.cur_id #get count Select count(*) from test.all_scripture_edits #1706 #add cur version of pages into format used in old table for a complete list of changes to #the commentary pages # * Watchout for overlapping id's. # I'm using cur_id as substitute for old_id. # Currently no problem due to different range of values insert into test.all_scripture_edits select b.cur_id, b.cur_namespace, b.cur_title, b.cur_id as old_id, b.cur_user_text as old_user_text, b.cur_timestamp as old_timestamp, b.cur_minor_edit as old_minor_edit from test.del1 a, feastupontheword.cur b where a.cur_id=b.cur_id #get new count Select count(*) from test.all_scripture_edits #2503 #get a list of talk pages that exist for commentary pages. create table test.cur_scriptures_talk select a.cur_id as commentary_cur_id, b.* from feastupontheword.cur a, feastupontheword.cur b where a.cur_id between 65257 and 73651 and b.cur_namespace=1 and a.cur_title=b.cur_title #get count Select count(*) from test.cur_scriptures_talk #217 #get a list of changes on those talk pages from old create table test.scripture_talk_edits select a.commentary_cur_id, a.cur_id, a.cur_namespace, a.cur_title, b.old_id, b.old_user_text, b.old_timestamp, old_minor_edit from test.cur_scriptures_talk a, feastupontheword.old b where a.cur_namespace=b.old_namespace and a.cur_title=b.old_title #get count Select count(*) from test.scripture_talk_edits #407 #add cur versions to that list insert into test.scripture_talk_edits select a.commentary_cur_id, a.cur_id, a.cur_namespace, a.cur_title, a.cur_id as old_id, a.cur_user_text as old_user_text, a.cur_timestamp as old_timestamp, a.cur_minor_edit as old_minor_edit from test.cur_scriptures_talk a #inserted 217 #get new count Select count(*) from test.scripture_talk_edits #624 #add talk edits to commentary edits to get one complete list # * switching using the commentary_cur_id as the cur_id rather than the talk pages' cur id # to allow all the edits for a single scripture (commentary or talk) to show up together. # * dropping talk cur_id. Ugly, but it isn't really needed now anyway. insert into test.all_scripture_edits select a.commentary_cur_id as cur_id, a.cur_namespace, a.cur_title, a.old_id, a.old_user_text, a.old_timestamp, a.old_minor_edit from test.scripture_talk_edits a #get new count Select count(*) from test.all_scripture_edits #3127 #create a summary page that shows, by page, how many edits (of different types) there have been create table test.scripture_edits_page_summary select cur_id, count(*) as total_edits, sum(case when cur_namespace=0 then 1 else 0 end) as commentary_edits, sum(case when cur_namespace=1 then 1 else 0 end) as talk_edits, sum(case when old_minor_edit=1 then 1 else 0 end) as minor_edits from test.all_scripture_edits group by cur_id #get count Select count(*) from test.scripture_edits_page_summary #821 #get all the zero pages for a complete list. create table test.scripture_edits_all_page_summary select a.cur_id, coalesce(b.total_edits,0) as total_edits, coalesce(b.commentary_edits,0) as commentary_edits, coalesce(b.talk_edits,0) as talk_edits, coalesce(b.minor_edits,0) as minor_edits from feastupontheword.cur a left outer join test.scripture_edits_page_summary b on a.cur_id=b.cur_id where a.cur_id between 65257 and 73651 #get count Select count(*) from test.scripture_edits_all_page_summary #8395 #further summarize by 100 page increments # * The last set of 100 will actually only have 95 in it. create table test.del2 select floor((cur_id-65257)/100) as set_100, min(cur_id) as min_cur_id, count(*) as pages, sum(total_edits) as total_edits, sum(commentary_edits) as commentary_edits, sum(talk_edits) as talk_edits, sum(minor_edits) as minor_edits from test.scripture_edits_all_page_summary group by floor((cur_id-65257)/100) #get count Select count(*) from test.del2 #84 #add title of first page for each set of 100 create table test.scripture_edits_set_100_summary select a.*, b.cur_title as start_title from test.del2 a, feastupontheword.cur b where a.min_cur_id=b.cur_id #get count Select count(*) from test.scripture_edits_set_100_summary #84 # use these commands to clean up temporary tables created in this code. drop table test.commentary_edits drop table test.del1 drop table test.all_scripture_edits drop table test.scripture_talk_edits drop table test.cur_scriptures_talk drop table test.scripture_edits_page_summary drop table test.scripture_edits_all_page_summary drop table test.del2 drop table test.scripture_edits_set_100_summary
</nowiki></pre>