Sunday, June 7, 2015

BCS Models - Part 1: Target Database

This post is part of an eight part series describing the process I have followed to be able create a BCS model capable of indexing well over 10 million items.

Series Index:
BCS Models - Part 1: Target Database <-- You are here
BCS Models - Part 2: Initial BCS External Content Types
BCS Models - Part 3: Crawl Results
BCS Models - Part 4: Bigger Database
BCS Models - Part 5: The Bigger Database
BCS Models - Part 6: How to eat this elephant?
BCS Models - Part 7: Changes to the BCS Model to support segmented crawl
BCS Models - Part 8: Crawl Results
BCS Models - Part 9: Crawl-Time Security 


I'm going to be using a database extract of http://Sharepoint.stackexchange.com that is published quarterly by the fine folks at Stack Exchange to demonstrate the BCS models.  I'm using the March 2015 extract.  The DB Model as created by the import tool looks like:
I got the current archive from Archive.org.  Brent Ozar has a nice post describing getting it into a SQL DB.

I'm going to model this into 7 entities:

Entity  Item Count
Answer  19,217
AnswerComment  28,152
Question  49,249
QuestionComment  37,770
Response  40,546
ResponseComment  40,144
User  24,613

The SQL Objects used to created these entities are below.  Given the nature of the dataset, it doesn't really make sense to implement the incremental crawl but I'm doing it anyway just to illustrate the procedure.

Views:
CREATE VIEW [dbo].[uv_AllQuestions]
AS
SELECT p.ID,
p.ParentId,
p.AcceptedAnswerId,
p.AnswerCount,
p.Body,
p.ClosedDate,
p.CommentCount,
p.CommunityOwnedDate,
p.CreationDate,
p.FavoriteCount,
p.LastActivityDate,
p.LastEditDate,
p.LastEditorDisplayName,
p.Score,
p.Tags,
p.Title,
p.ViewCount,
u.DisplayName,
PT.Type as PostType
  FROM dbo.Posts AS
left outer JOIN Users AS U ON p.OwnerUserId = U.Id 
left outer JOIN PostTypes AS PT ON p.PostTypeId = PT.Id
 WHERE p.ParentId = 0
GO

CREATE VIEW [dbo].[uv_AllQuestionComments]
AS
SELECT c.Id AS CommentID, 
c.CreationDate, 
c.PostId, 
c.Score, 
c.Text, 
c.UserId, 
u.DisplayName
  FROM dbo.Comments c
INNER JOIN dbo.Users u ON u.Id = c.UserId
 where exists (
select 1
  from uv_AllQuestions
where ID = c.PostID
)


GO

CREATE VIEW [dbo].[uv_AllResponses]
AS
SELECT p.ID,
p.ParentId,
p.AnswerCount,
p.Body,
p.ClosedDate,
p.CommentCount,
p.CommunityOwnedDate,
p.CreationDate,
p.FavoriteCount,
p.LastActivityDate,
p.LastEditDate,
p.LastEditorDisplayName,
p.Score,
p.Tags,
p.Title,
p.ViewCount,
u.DisplayName,
PT.Type as PostType
  FROM dbo.Posts AS
left outer JOIN Users AS U ON p.OwnerUserId = U.Id 
left outer JOIN PostTypes AS PT ON p.PostTypeId = PT.Id
 WHERE p.ParentId <> 0
   and Not exists (
select
  from dbo.posts 
where AcceptedAnswerId = p.Id
)
GO

CREATE VIEW [dbo].[uv_AllResponseComments]
AS
SELECT c.Id AS CommentID, 
c.CreationDate, 
c.PostId, 
c.Score, 
c.Text, 
c.UserId, 
u.DisplayName
  FROM dbo.Comments c
INNER JOIN dbo.Users u ON u.Id = c.UserId
 where exists (
select 1
  from uv_AllResponses
where ID = c.PostID
)
GO

CREATE VIEW [dbo].[uv_AllAcceptedAnswers]
AS
SELECT p.ID,
p.ParentId,
p.AnswerCount,
p.Body,
p.ClosedDate,
p.CommentCount,
p.CommunityOwnedDate,
p.CreationDate,
p.FavoriteCount,
p.LastActivityDate,
p.LastEditDate,
p.LastEditorDisplayName,
p.Score,
p.Tags,
p.Title,
p.ViewCount,
u.DisplayName,
PT.Type as PostType
  FROM dbo.Posts AS
left outer JOIN Users AS U ON p.OwnerUserId = U.Id 
left outer JOIN PostTypes AS PT ON p.PostTypeId = PT.Id
 WHERE p.ParentId > 0
   and exists (
select
  from dbo.posts 
where AcceptedAnswerId = p.Id
)
GO

CREATE VIEW [dbo].[uv_AllAnswerComments]
AS
SELECT c.Id AS CommentID, 
c.CreationDate, 
c.PostId, 
c.Score, 
c.Text, 
c.UserId, 
u.DisplayName
  FROM dbo.Comments c
INNER JOIN dbo.Users u ON u.Id = c.UserId
 where exists (
select 1
  from uv_AllAcceptedAnswers aaa
where aaa.ID = c.PostID
)
GO

CREATE VIEW [dbo].[uv_AllComments]
AS
SELECT c.Id AS CommentID, 
c.CreationDate, 
c.PostId, 
c.Score, 
c.Text, 
c.UserId,
u.DisplayName
FROM         dbo.Comments c
INNER JOIN dbo.Users u ON c.UserId = u.Id
GO

CREATE VIEW [dbo].[uv_AllUsers]
AS

SELECT
  u.Id,
  u.AboutMe,
  u.Age,
  u.CreationDate,
  u.DisplayName,
  u.DownVotes,
  u.LastAccessDate,
  u.Location,
  u.Reputation,
  u.UpVotes,
  u.[Views],
  u.WebsiteUrl
FROM Users u

GO


Stored Procedures:
CREATE PROCEDURE [dbo].[usp_getAccpetedAnswersByQuestionID] @QuestionID INT AS
  SELECT aaa.ID,
aaa.ParentId,
aaa.AnswerCount,
aaa.Body,
aaa.ClosedDate,
aaa.CommentCount,
aaa.CommunityOwnedDate,
aaa.CreationDate,
aaa.FavoriteCount,
aaa.LastActivityDate,
aaa.LastEditDate,
aaa.LastEditorDisplayName,
aaa.Score,
aaa.Tags,
aaa.Title,
aaa.ViewCount,
aaa.DisplayName,
aaa.PostType
    FROM uv_AllAcceptedAnswers aaa
   WHERE aaa.parentID = @questionID 


GO

CREATE PROCEDURE [dbo].[usp_getComment] @CommentID INT AS
  SELECT ac.CommentID,
ac.CreationDate,
ac.PostId,
ac.Score,
ac.Text,
ac.UserId,
ac.DisplayName
    FROM dbo.uv_AllComments ac
   WHERE CommentID =@CommentID 
GO

create PROCEDURE [dbo].[usp_getCommentByPostID] @postID INT AS
  SELECT ac.CommentID,
ac.CreationDate,
ac.PostId,
ac.Score,
ac.Text,
ac.UserId,
ac.DisplayName
    FROM dbo.uv_AllComments ac
   WHERE PostID =@postID 

GO

CREATE PROCEDURE [dbo].[usp_getPostByID] @postID INT AS
  SELECT p.ID,
p.ParentId,
p.AcceptedAnswerId,
p.AnswerCount,
p.Body,
p.ClosedDate,
p.CommentCount,
p.CommunityOwnedDate,
p.CreationDate,
p.FavoriteCount,
p.LastActivityDate,
p.LastEditDate,
p.LastEditorDisplayName,
p.Score,
p.Tags,
p.Title,
p.ViewCount,
u.DisplayName,
PT.Type as PostType
    FROM dbo.Posts p 
left outer JOIN Users AS U ON p.OwnerUserId = U.Id 
left outer JOIN PostTypes AS PT ON p.PostTypeId = PT.Id
   WHERE p.ID = @postID 
GO

CREATE PROCEDURE [dbo].[usp_getResponsesByQuestionID] @QuestionID INT AS
  SELECT ar.ID,
ar.ParentId,
ar.AnswerCount,
ar.Body,
ar.ClosedDate,
ar.CommentCount,
ar.CommunityOwnedDate,
ar.CreationDate,
ar.FavoriteCount,
ar.LastActivityDate,
ar.LastEditDate,
ar.LastEditorDisplayName,
ar.Score,
ar.Tags,
ar.Title,
ar.ViewCount,
ar.DisplayName,
ar.PostType
    FROM dbo.uv_AllResponses ar
   WHERE ar.ID =@questionID 

GO

CREATE PROCEDURE [dbo].[usp_getUser] @userID INT AS
  SELECT  ua.Id,
          ua.AboutMe,
          ua.Age,
          ua.CreationDate,
          ua.DisplayName,
          ua.DownVotes,
          ua.LastAccessDate,
          ua.Location,
          ua.Reputation,
          ua.UpVotes,
          ua.Views,
          ua.WebsiteUrl
    FROM dbo.uv_AllUsers ua 
   WHERE Id=@userid 

GO

CREATE PROCEDURE [dbo].[usp_getChangedAnswers] @LastRunDate datetime output AS
  if @LastRunDate != CAST('1900-1-1' as datetime)
  SELECT aa.ID
    FROM dbo.uv_AllAcceptedAnswers aa 
  WHERE aa.LastEditDate > @LastRunDate
  set @LastRunDate = CURRENT_TIMESTAMP
GO

CREATE PROCEDURE [dbo].[usp_getChangedAnswerComments] @LastRunDate datetime output AS
  if @LastRunDate != CAST('1900-1-1' as datetime)
  SELECT CommentID
    FROM dbo.uv_AllAnswerComments 
  WHERE CreationDate > @LastRunDate
  set @LastRunDate = CURRENT_TIMESTAMP
GO

CREATE PROCEDURE [dbo].[usp_getChangedComments] @LastRunDate datetime output AS
  if @LastRunDate != CAST('1900-1-1' as datetime)
  SELECT ac.CommentID
    FROM dbo.uv_AllComments ac
  WHERE ac.creationdate > @LastRunDate
set @LastRunDate = CURRENT_TIMESTAMP
GO

CREATE PROCEDURE [dbo].[usp_getChangedQuestionComments] @LastRunDate datetime output AS
  if @LastRunDate != CAST('1900-1-1' as datetime)
  SELECT CommentID
    FROM dbo.uv_AllQuestionComments 
  WHERE CreationDate > @LastRunDate
  set @LastRunDate = CURRENT_TIMESTAMP
GO

CREATE PROCEDURE [dbo].[usp_getChangedQuestions] @LastRunDate datetime output AS
  if @LastRunDate != CAST('1900-1-1' as datetime)
  SELECT aq.ID
    FROM dbo.uv_AllQuestions aq 
  WHERE aq.LastEditDate > @LastRunDate
set @LastRunDate = CURRENT_TIMESTAMP
GO

CREATE PROCEDURE [dbo].[usp_getChangedResponseComments] @LastRunDate datetime output AS
  if @LastRunDate != CAST('1900-1-1' as datetime)
  SELECT CommentID
    FROM dbo.uv_AllResponseComments 
  WHERE CreationDate > @LastRunDate
  set @LastRunDate = CURRENT_TIMESTAMP
GO

CREATE PROCEDURE [dbo].[usp_getChangedResponses] @LastRunDate datetime output AS
  if @LastRunDate != CAST('1900-1-1' as datetime)
  SELECT ar.ID
    FROM dbo.uv_Allresponses ar 
  WHERE LastEditDate > @LastRunDate
set @LastRunDate = CURRENT_TIMESTAMP
GO

CREATE PROCEDURE [dbo].[usp_getChangedUsers] @LastRunDate datetime output as
  if @LastRunDate != CAST('1900-1-1' as datetime)
  SELECT  ua.Id
    FROM dbo.uv_AllUsers ua 
  WHERE ua.CreationDate > @LastRunDate
set @LastRunDate = CURRENT_TIMESTAMP
GO


CREATE PROCEDURE [dbo].[usp_getDeletedAnswers] @LastRunDate datetime output AS
  SELECT aa.ID
    FROM dbo.uv_AllAcceptedAnswers aa 
   WHERE 0 = 1
  set @LastRunDate = CURRENT_TIMESTAMP
GO

Create PROCEDURE [dbo].[usp_getDeletedAnswerComments] @LastRunDate datetime output AS
  SELECT CommentID
    FROM dbo.uv_AllAnswerComments 
   WHERE 0 = 1
set @LastRunDate = CURRENT_TIMESTAMP
GO

CREATE PROCEDURE [dbo].[usp_getDeletedComments] @LastRunDate datetime output AS
  SELECT ac.CommentID
    FROM dbo.uv_AllComments ac
   WHERE 0 = 1
set @LastRunDate = CURRENT_TIMESTAMP
GO

Create PROCEDURE [dbo].[usp_getDeletedQuestionComments] @LastRunDate datetime output AS
  SELECT CommentID
    FROM dbo.uv_AllQuestionComments 
   WHERE 0 = 1
set @LastRunDate = CURRENT_TIMESTAMP
GO

CREATE PROCEDURE [dbo].[usp_getDeletedQuestions] @LastRunDate datetime output AS
  SELECT  aq.ID
    FROM dbo.uv_AllQuestions aq 
   WHERE 0 = 1
set @LastRunDate = CURRENT_TIMESTAMP
GO

Create PROCEDURE [dbo].[usp_getDeletedResponseComments] @LastRunDate datetime output AS
  SELECT CommentID
    FROM dbo.uv_AllResponseComments 
   WHERE 0 = 1
set @LastRunDate = CURRENT_TIMESTAMP
GO

CREATE PROCEDURE [dbo].[usp_getDeletedResponses] @LastRunDate datetime output AS
  SELECT ar.ID
    FROM dbo.uv_Allresponses ar 
   WHERE 0 = 1
set @LastRunDate = CURRENT_TIMESTAMP
GO

CREATE PROCEDURE [dbo].[usp_getDeletedUsers] @LastRunDate datetime output as
  SELECT  ua.Id
    FROM dbo.uv_AllUsers ua 
   WHERE 0 = 1
set @LastRunDate = CURRENT_TIMESTAMP
GO


The next post will describe the BCS Model to use these.


No comments: