Difference between revisions of "Site:Developer stuff/Analysis of edits"

From Feast upon the Word (http://feastupontheword.org). Copyright, Feast upon the Word.
Jump to: navigation, search
(adding content)
 
(should have posted this a while ago. just got to it. Latest version of this code. Last used in March.)
 
(4 intermediate revisions by the same user not shown)
Line 1: Line 1:
 +
<pre>
 +
<nowiki>
 
# use these commands to clean up temporary tables created in this code.
 
# use these commands to clean up temporary tables created in this code.
drop table test.commentary_edits
+
drop table test.commentary_edits;
 
drop table test.del1
 
drop table test.del1
 
drop table test.all_scripture_edits
 
drop table test.all_scripture_edits
drop table test.scripture_talk_edits
 
drop table test.cur_scriptures_talk
 
 
drop table test.scripture_edits_page_summary
 
drop table test.scripture_edits_page_summary
 
drop table test.scripture_edits_all_page_summary
 
drop table test.scripture_edits_all_page_summary
Line 10: Line 10:
 
drop table test.scripture_edits_set_100_summary
 
drop table test.scripture_edits_set_100_summary
  
#Get the initial page content for each page
+
#get a list from old of all the edits.
# * I don't want to count the original commentary page content as an edit
+
create table test.del1
+
select a.cur_id, min(old_id) as min_old_id
+
from test.commentary_edits a
+
group by a.cur_id
+
#Get a list of page edits out of old on commentary pages.
+
# * 65257 and 73651 is the range of commentary pages for the scriptures.
+
# * some pages (e.g. BOM title page, Article of Faith pages, Official declaration pages)
+
#   are excluded. I'm not bothering with them for now.
+
# * old doesn't contain the most recent version. It only contains the history.
+
 
Create table test.commentary_edits
 
Create table test.commentary_edits
select a.cur_id, a.cur_namespace, a.cur_title, b.old_id, b.old_user_text, b.old_timestamp,  
+
select a.order_id, b.old_namespace as namespace, b.old_title as title, b.old_id, b.old_user_text as user_text, b.old_timestamp as timestamp, b.old_minor_edit as minor_edit
 +
from test.commentary_pages_ordered a, feastupontheword.old b
 +
where a.cur_title=b.old_title and b.old_namespace in (0,1)
  
old_minor_edit
 
from feastupontheword.cur a, feastupontheword.old b
 
where a.cur_namespace=b.old_namespace and a.cur_title=b.old_title
 
and a.cur_id  between 65257 and 73651
 
 
#get count
 
Select count(*) from test.commentary_edits
 
#2503
 
  
#Get the initial page content for each page
+
#kick out original content on commentary pages.
# * I don't want to count the original commentary page content as an edit
+
#first find the first entry for commentary pages
 +
#we don't do the same for talk pages since they are originally blank.
 
create table test.del1
 
create table test.del1
select a.cur_id, min(old_id) as min_old_id
+
select a.order_id, a.namespace, a.title, min(case when namespace=0 then old_id else 0 end) as min_old_id
 
from test.commentary_edits a
 
from test.commentary_edits a
group by a.cur_id
+
group by a.order_id, a.namespace, a.title
  
#get count
 
Select count(*) from test.del1
 
#797
 
  
#Get the commentary page edits from old excluding the original content
+
#make table without original commentary content
 
create table test.all_scripture_edits
 
create table test.all_scripture_edits
select a.*
+
select a.*, NULL as cur_id
 
from test.commentary_edits a, test.del1 b
 
from test.commentary_edits a, test.del1 b
where a.old_id<>b.min_old_id and a.cur_id=b.cur_id
+
where a.old_id<>b.min_old_id and a.namespace=b.namespace and a.title=b.title
  
#get count
+
#add cur content for commentary pages.
Select count(*) from test.all_scripture_edits
+
#1706
+
 
+
#add cur version of pages into format used in old table for a complete list of changes to
+
#the commentary pages
+
# * Watchout for overlapping id's.
+
#   I'm using cur_id as substitute for old_id.  
+
#   Currently no problem due to different range of values
+
 
insert into test.all_scripture_edits
 
insert into test.all_scripture_edits
select b.cur_id, b.cur_namespace, b.cur_title, b.cur_id as old_id, b.cur_user_text as old_user_text,
+
select a.order_id, b.cur_namespace as namespace, b.cur_title as title, NULL as old_id, b.cur_user_text as user_text, b.cur_timestamp as timestamp, b.cur_minor_edit as minor_edit, b.cur_id
b.cur_timestamp as old_timestamp, b.cur_minor_edit as old_minor_edit
+
 
from test.del1 a, feastupontheword.cur b
 
from test.del1 a, feastupontheword.cur b
where a.cur_id=b.cur_id
+
where a.namespace=0 and b.cur_namespace=0 and a.title=b.cur_title
 
+
#get new count
+
Select count(*) from test.all_scripture_edits
+
#2503
+
 
+
#get a list of talk pages that exist for commentary pages.
+
create table test.cur_scriptures_talk
+
select a.cur_id as commentary_cur_id, b.*
+
from feastupontheword.cur a, feastupontheword.cur b
+
where a.cur_id between 65257 and 73651
+
and b.cur_namespace=1
+
and a.cur_title=b.cur_title
+
 
+
#get count
+
Select count(*) from test.cur_scriptures_talk
+
#217
+
 
+
#get a list of changes on those talk pages from old
+
create table test.scripture_talk_edits
+
select a.commentary_cur_id, a.cur_id, a.cur_namespace, a.cur_title, b.old_id, b.old_user_text,
+
 
+
b.old_timestamp, old_minor_edit
+
from test.cur_scriptures_talk a, feastupontheword.old b
+
where a.cur_namespace=b.old_namespace and a.cur_title=b.old_title
+
 
+
#get count
+
Select count(*) from test.scripture_talk_edits
+
#407
+
 
+
#add cur versions to that list
+
insert into test.scripture_talk_edits
+
select a.commentary_cur_id, a.cur_id, a.cur_namespace, a.cur_title, a.cur_id as old_id,
+
 
+
a.cur_user_text as old_user_text,
+
a.cur_timestamp as old_timestamp, a.cur_minor_edit as old_minor_edit
+
from test.cur_scriptures_talk a
+
#inserted 217
+
 
+
#get new count
+
Select count(*) from test.scripture_talk_edits
+
#624
+
  
#add talk edits to commentary edits to get one complete list
+
#add cur content for talk pages
# * switching using the commentary_cur_id as the cur_id rather than the talk pages' cur id
+
#   to allow all the edits for a single scripture (commentary or talk) to show up together.
+
# * dropping talk cur_id. Ugly, but it isn't really needed now anyway.
+
 
insert into test.all_scripture_edits
 
insert into test.all_scripture_edits
select a.commentary_cur_id as cur_id, a.cur_namespace, a.cur_title, a.old_id, a.old_user_text,
+
select a.order_id, b.cur_namespace as namespace, b.cur_title as title, NULL as old_id, b.cur_user_text as user_text, b.cur_timestamp as timestamp, b.cur_minor_edit as minor_edit, b.cur_id
a.old_timestamp, a.old_minor_edit
+
from test.commentary_pages_ordered a, feastupontheword.cur b
from test.scripture_talk_edits a
+
where b.cur_namespace=1 and a.cur_title=b.cur_title
  
#get new count
 
Select count(*) from test.all_scripture_edits
 
#3127
 
 
#create a summary page that shows, by page, how many edits (of different types) there have been
 
 
create table test.scripture_edits_page_summary
 
create table test.scripture_edits_page_summary
select cur_id,  
+
select order_id,
 +
title,
 
count(*) as total_edits,  
 
count(*) as total_edits,  
sum(case when cur_namespace=0 then 1 else 0 end) as commentary_edits,  
+
sum(case when namespace=0 then 1 else 0 end) as commentary_edits,  
sum(case when cur_namespace=1 then 1 else 0 end) as talk_edits,
+
sum(case when namespace=1 then 1 else 0 end) as talk_edits,
sum(case when old_minor_edit=1 then 1 else 0 end) as minor_edits
+
sum(case when minor_edit=1 then 1 else 0 end) as minor_edits
 
from test.all_scripture_edits
 
from test.all_scripture_edits
group by cur_id
+
group by order_id, title
 
+
#get count
+
Select count(*) from test.scripture_edits_page_summary
+
#821
+
  
 
#get all the zero pages for a complete list.
 
#get all the zero pages for a complete list.
 
create table test.scripture_edits_all_page_summary
 
create table test.scripture_edits_all_page_summary
select a.cur_id, coalesce(b.total_edits,0) as total_edits,
+
select b.order_id, coalesce(c.total_edits,0) as total_edits,
coalesce(b.commentary_edits,0) as commentary_edits,
+
coalesce(c.commentary_edits,0) as commentary_edits,
coalesce(b.talk_edits,0) as talk_edits,
+
coalesce(c.talk_edits,0) as talk_edits,
coalesce(b.minor_edits,0) as minor_edits
+
coalesce(c.minor_edits,0) as minor_edits
from feastupontheword.cur a left outer join
+
from feastupontheword.cur a inner join test.commentary_pages_ordered b
test.scripture_edits_page_summary b
+
on a.cur_id=b.cur_id
on a.cur_id=b.cur_id
+
left outer join
where a.cur_id  between 65257 and 73651
+
test.scripture_edits_page_summary c
 
+
on b.order_id=c.order_id
#get count
+
Select count(*) from test.scripture_edits_all_page_summary
+
#8395
+
  
 
#further summarize by 100 page increments
 
#further summarize by 100 page increments
 
# * The last set of 100 will actually only have 95 in it.
 
# * The last set of 100 will actually only have 95 in it.
 
create table test.del2
 
create table test.del2
select floor((cur_id-65257)/100) as set_100, min(cur_id) as min_cur_id, count(*) as pages,  
+
select floor(order_id/100) as set_100, min(order_id) as min_order_id,
 
+
count(*) as pages, sum(total_edits) as total_edits,
sum(total_edits) as total_edits,
+
 
sum(commentary_edits) as commentary_edits, sum(talk_edits) as talk_edits,  
 
sum(commentary_edits) as commentary_edits, sum(talk_edits) as talk_edits,  
 
sum(minor_edits) as minor_edits
 
sum(minor_edits) as minor_edits
 
from test.scripture_edits_all_page_summary
 
from test.scripture_edits_all_page_summary
group by floor((cur_id-65257)/100)
+
group by floor(order_id/100)
  
 
#get count
 
#get count
Line 166: Line 83:
 
create table test.scripture_edits_set_100_summary
 
create table test.scripture_edits_set_100_summary
 
select a.*, b.cur_title as start_title
 
select a.*, b.cur_title as start_title
from test.del2 a, feastupontheword.cur b
+
from test.del2 a, test.commentary_pages_ordered b
where a.min_cur_id=b.cur_id
+
where a.min_order_id=b.order_id
  
#get count
+
EXPORT
Select count(*) from test.scripture_edits_set_100_summary
+
#84
+
  
 
# use these commands to clean up temporary tables created in this code.
 
# use these commands to clean up temporary tables created in this code.
drop table test.commentary_edits
+
drop table test.commentary_edits;
 
drop table test.del1
 
drop table test.del1
 
drop table test.all_scripture_edits
 
drop table test.all_scripture_edits
drop table test.scripture_talk_edits
 
drop table test.cur_scriptures_talk
 
 
drop table test.scripture_edits_page_summary
 
drop table test.scripture_edits_page_summary
 
drop table test.scripture_edits_all_page_summary
 
drop table test.scripture_edits_all_page_summary
 
drop table test.del2
 
drop table test.del2
 
drop table test.scripture_edits_set_100_summary
 
drop table test.scripture_edits_set_100_summary
 +
</nowiki></pre>

Latest revision as of 07:30, 29 April 2006


# use these commands to clean up temporary tables created in this code.
drop table test.commentary_edits;
drop table test.del1
drop table test.all_scripture_edits
drop table test.scripture_edits_page_summary
drop table test.scripture_edits_all_page_summary
drop table test.del2
drop table test.scripture_edits_set_100_summary

#get a list from old of all the edits.
Create table test.commentary_edits
select a.order_id, b.old_namespace as namespace, b.old_title as title, b.old_id, b.old_user_text as user_text, b.old_timestamp as timestamp, b.old_minor_edit as minor_edit
from test.commentary_pages_ordered a, feastupontheword.old b
where a.cur_title=b.old_title and b.old_namespace in (0,1)


#kick out original content on commentary pages.
#first find the first entry for commentary pages
#we don't do the same for talk pages since they are originally blank.
create table test.del1
select a.order_id, a.namespace, a.title, min(case when namespace=0 then old_id else 0 end) as min_old_id
from test.commentary_edits a
group by a.order_id, a.namespace, a.title


#make table without original commentary content
create table test.all_scripture_edits
select a.*, NULL as cur_id
from test.commentary_edits a, test.del1 b
where a.old_id<>b.min_old_id and a.namespace=b.namespace and a.title=b.title

#add cur content for commentary pages.
insert into test.all_scripture_edits
select a.order_id, b.cur_namespace as namespace, b.cur_title as title, NULL as old_id, b.cur_user_text as user_text, b.cur_timestamp as timestamp, b.cur_minor_edit as minor_edit, b.cur_id
from test.del1 a, feastupontheword.cur b
where a.namespace=0 and b.cur_namespace=0 and a.title=b.cur_title

#add cur content for talk pages
insert into test.all_scripture_edits
select a.order_id, b.cur_namespace as namespace, b.cur_title as title, NULL as old_id, b.cur_user_text as user_text, b.cur_timestamp as timestamp, b.cur_minor_edit as minor_edit, b.cur_id
from test.commentary_pages_ordered a, feastupontheword.cur b
where b.cur_namespace=1 and a.cur_title=b.cur_title

create table test.scripture_edits_page_summary
select order_id, 
	title,
	count(*) as total_edits, 
	sum(case when namespace=0 then 1 else 0 end) as commentary_edits, 
	sum(case when namespace=1 then 1 else 0 end) as talk_edits,
	sum(case when minor_edit=1 then 1 else 0 end) as minor_edits
from test.all_scripture_edits
group by order_id, title

#get all the zero pages for a complete list.
create table test.scripture_edits_all_page_summary
select b.order_id, coalesce(c.total_edits,0) as total_edits,
coalesce(c.commentary_edits,0) as commentary_edits,
coalesce(c.talk_edits,0) as talk_edits,
coalesce(c.minor_edits,0) as minor_edits
from feastupontheword.cur a inner join test.commentary_pages_ordered b
	on a.cur_id=b.cur_id
left outer join
	test.scripture_edits_page_summary c
on b.order_id=c.order_id

#further summarize by 100 page increments
#	* The last set of 100 will actually only have 95 in it.
create table test.del2
select floor(order_id/100) as set_100, min(order_id) as min_order_id,
count(*) as pages, sum(total_edits) as total_edits,
sum(commentary_edits) as commentary_edits, sum(talk_edits) as talk_edits, 
sum(minor_edits) as minor_edits
from test.scripture_edits_all_page_summary
group by floor(order_id/100)

#get count
Select count(*) from test.del2
#84

#add title of first page for each set of 100
create table test.scripture_edits_set_100_summary
select a.*, b.cur_title as start_title
from test.del2 a, test.commentary_pages_ordered b
where a.min_order_id=b.order_id

EXPORT

# use these commands to clean up temporary tables created in this code.
drop table test.commentary_edits;
drop table test.del1
drop table test.all_scripture_edits
drop table test.scripture_edits_page_summary
drop table test.scripture_edits_all_page_summary
drop table test.del2
drop table test.scripture_edits_set_100_summary