Architecture Tradeoffs

From semantic-mediawiki.org

In SMW there are architecutral tradeoffs. This is a page intended to start a discussion on these.

Example[edit]

SMW Triples in SQLStore3[edit]

The SQLStore3 consists of multiple tables like

  • smw_di_blob
  • smw_di_number
  • smw_di_time
  • ...
  • smw_object_ids

essentially there is a table per SMW Type and a main table smw_object_ids to link things together.

The decision for this Database design was probably influenced by the need for better performance. The trade off here is less maintainability and less understandability.

This design change was done a few years ago in a separate task.

There would be the possibility to have a simple SQL Query that delivers:

  • id
  • subject
  • predicate
  • object
  • type
  • namespace

which might be helpful in a quite a few usecase. There are pro's and cons for the approaches. E.g. a View with the 5 items above might violate the encapsulation principle which might be harmful.

The id column e.g. might be useful to solve https://github.com/SemanticMediaWiki/SemanticMediaWiki/issues/985

Proposal for a View that makes SMW Triples available for inspection/debugging[edit]

#
# Copyright(c) 2015 BITPlan GmbH
# 
# http://www.profiwiki.de
#
# Author: Wolfgang Fahl
# 
# Access TO Semantic Mediawiki TripleStore
# 2015-06-10
#   
#  FUNCTION SPLIT_STR
#  FUNCTION NAMESPACE
#  VIEW SMW_TRIPLES
#  VIEW SMW_TRIPLES_NS
#
# SPLIT_STR
#   see http://blog.fedecarg.com/2009/02/22/mysql-split-string-FUNCTION/
#
DROP FUNCTION IF EXISTS SPLIT_STR;
CREATE FUNCTION SPLIT_STR(
  x VARCHAR(255),
  delim VARCHAR(12),
  pos INT
)
RETURNS VARCHAR(255)
DETERMINISTIC
NO SQL
SQL SECURITY INVOKER
RETURN REPLACE(SUBSTRING(SUBSTRING_INDEX(x, delim, pos),
       LENGTH(SUBSTRING_INDEX(x, delim, pos -1)) + 1),
       delim, '');
#
# NAMESPACE
#   see http://www.mediawiki.org/wiki/Manual:Namespace#Built-in_namespaces
#
DROP FUNCTION IF EXISTS NAMESPACE;
CREATE FUNCTION namespace (ns INT)
  RETURNS VARCHAR(512)
  LANGUAGE SQL
  DETERMINISTIC
  CONTAINS SQL
  SQL SECURITY INVOKER
  COMMENT 'Mediawiki NameSpace number to Namespace name conversion '
RETURN CASE ns 
    WHEN -2 THEN 'Media:'
    WHEN -1 THEN 'Special:'
    WHEN 0 THEN ''
    WHEN 1 THEN 'Talk:'
    WHEN 2 THEN 'User:'
    WHEN 3 THEN 'User talk:'
    WHEN 4 THEN 'Project:'
    WHEN 5 THEN 'Project talk:'
    WHEN 6 THEN 'File:'
    WHEN 7 THEN 'File talk:'
    WHEN 8 THEN 'Mediawiki:'
    WHEN 9 THEN 'Mediawiki talk:'
    WHEN 10 THEN 'Template:'
    WHEN 11 THEN 'Template talk:'
    WHEN 12 THEN 'Help:'
    WHEN 13 THEN 'Help talk:'
    WHEN 14 THEN 'Category:'
    WHEN 15 THEN 'Category talk:'
    WHEN 102 THEN 'Property:' 
    WHEN 106 THEN 'Form:'
    WHEN 108 THEN 'Concept:' 
    ELSE ''
    END;
# 
# SMW TRIPlES
#
# creates a VIEW based ON the SQLStore3 TABLES TO simplify the access TO the triple store
# which has been complicated BY the SQLStore3 redesign
# the performance OF the VIEW might be slow FOR big wikis (e.g. more than 1 million triples)
# see https://semantic-mediawiki.org/wiki/Database_tables
# see https://semantic-mediawiki.org/wiki/Help:SQLStore
# see https://semantic-mediawiki.org/wiki/SQLStore_update
# see https://semantic-mediawiki.org/wiki/SMWCon_Fall_2012/Improvements_in_SQLStore3
#
# 
CREATE OR REPLACE 
#   
#
# DEFAULT SQL security IS DEFINER which IS trouble SOME WHEN VIEW IS backed up AND restored ON
# a host WHERE the defining USER IS NOT available
#
# see:
# http://stackoverflow.com/questions/10169960/mysql-error-1449-the-user-specified-as-a-definer-does-not-exist
# USE different security setting
SQL SECURITY INVOKER
# 
VIEW SMW_TRIPLES AS
  SELECT 
    CAST(sois.smw_title AS CHAR) AS subject,
    CAST(soip.smw_title AS CHAR) AS predicate,
    CAST(sdb.o_blob AS CHAR) AS object,
    'blob' AS TYPE
  FROM 
    smw_di_blob sdb
    JOIN smw_object_ids sois ON sois.smw_id=sdb.s_id
    JOIN smw_object_ids soip ON soip.smw_id=sdb.p_id 
  WHERE 
    sdb.o_blob IS NOT NULL
UNION
  SELECT 
    CAST(sois.smw_title AS CHAR) AS subject,
    CAST(soip.smw_title AS CHAR) AS predicate,
    CAST(sdb.o_hash AS CHAR) AS object,
    'shortblob' AS TYPE
  FROM 
    smw_di_blob sdb
    JOIN smw_object_ids sois ON sois.smw_id=sdb.s_id
    JOIN smw_object_ids soip ON soip.smw_id=sdb.p_id 
  WHERE 
    sdb.o_blob IS NULL
UNION
  SELECT 
    CAST(sois.smw_title AS CHAR) AS subject,
    CAST(soip.smw_title AS CHAR) AS predicate,
    IF (sdb.o_value,'true','false') AS object,
    'bool' AS TYPE
  FROM 
    smw_di_bool sdb
    JOIN smw_object_ids sois ON sois.smw_id=sdb.s_id
    JOIN smw_object_ids soip ON soip.smw_id=sdb.p_id  
UNION
  SELECT 
    CAST(sois.smw_title AS CHAR) AS subject,
    CAST(soip.smw_title AS CHAR) AS predicate,
    sdb.o_sortkey AS object,
    'number' AS TYPE
  FROM 
    smw_di_number sdb
    JOIN smw_object_ids sois ON sois.smw_id=sdb.s_id
    JOIN smw_object_ids soip ON soip.smw_id=sdb.p_id
UNION 
  SELECT 
    CAST(sois.smw_title AS CHAR) AS subject,
    CAST(soip.smw_title AS CHAR) AS predicate,
    CONCAT(
       split_str(CAST(sdb.o_serialized AS CHAR),'/',2),'-',
       split_str(CAST(sdb.o_serialized AS CHAR),'/',3),'-',
       split_str(CAST(sdb.o_serialized AS CHAR),'/',4)
       ) AS object,
    'time' AS TYPE
  FROM 
    smw_di_time sdb
    JOIN smw_object_ids sois ON sois.smw_id=sdb.s_id
    JOIN smw_object_ids soip ON soip.smw_id=sdb.p_id
UNION
  SELECT 
    CAST(sois.smw_title AS CHAR) AS subject,
    CAST(soip.smw_title AS CHAR) AS predicate,
    CAST(sdb.o_serialized AS CHAR) AS object,
    'uri' AS TYPE
  FROM 
    smw_di_uri sdb
    JOIN smw_object_ids sois ON sois.smw_id=sdb.s_id
    JOIN smw_object_ids soip ON soip.smw_id=sdb.p_id
UNION
  SELECT 
    CAST(sois.smw_title AS CHAR) AS subject,
    CAST(soip.smw_title AS CHAR) AS predicate,
    CAST(soio.smw_title AS CHAR) AS object,
    'page' AS TYPE
  FROM 
    smw_di_wikipage sdb
    JOIN smw_object_ids sois ON sois.smw_id=sdb.s_id
    JOIN smw_object_ids soip ON soip.smw_id=sdb.p_id
    JOIN smw_object_ids soio ON soio.smw_id=sdb.o_id;
#
# SMW Triples WITH Namespaces
#
#    
CREATE OR REPLACE 
#   
#
# DEFAULT SQL security IS DEFINER which IS trouble SOME WHEN VIEW IS backed up AND restored ON
# a host WHERE the defining USER IS NOT available
#
# see:
# http://stackoverflow.com/questions/10169960/mysql-error-1449-the-user-specified-as-a-definer-does-not-exist
# USE different security setting
SQL SECURITY INVOKER
# 
VIEW SMW_TRIPLES_NS AS
  SELECT 
    CONCAT(namespace(sois.smw_namespace),CAST(sois.smw_title AS CHAR),CAST(sois.smw_subobject AS CHAR)) AS subject,
    CONCAT(namespace(soip.smw_namespace),CAST(soip.smw_title AS CHAR),CAST(soip.smw_subobject AS CHAR)) AS predicate,
    CAST(sdb.o_blob AS CHAR) AS object,
    'blob' AS TYPE
  FROM 
    smw_di_blob sdb
    JOIN smw_object_ids sois ON sois.smw_id=sdb.s_id
    JOIN smw_object_ids soip ON soip.smw_id=sdb.p_id 
  WHERE 
    sdb.o_blob IS NOT NULL
UNION
  SELECT 
    CONCAT(namespace(sois.smw_namespace),CAST(sois.smw_title AS CHAR),CAST(sois.smw_subobject AS CHAR)) AS subject,
    CONCAT(namespace(soip.smw_namespace),CAST(soip.smw_title AS CHAR),CAST(soip.smw_subobject AS CHAR)) AS predicate,
    CAST(sdb.o_hash AS CHAR) AS object,
    'shortblob' AS TYPE
  FROM 
    smw_di_blob sdb
    JOIN smw_object_ids sois ON sois.smw_id=sdb.s_id
    JOIN smw_object_ids soip ON soip.smw_id=sdb.p_id 
  WHERE 
    sdb.o_blob IS NULL
UNION
  SELECT 
    CONCAT(namespace(sois.smw_namespace),CAST(sois.smw_title AS CHAR),CAST(sois.smw_subobject AS CHAR)) AS subject,
    CONCAT(namespace(soip.smw_namespace),CAST(soip.smw_title AS CHAR),CAST(soip.smw_subobject AS CHAR)) AS predicate,
    IF (sdb.o_value,'true','false') AS object,
    'bool' AS TYPE
  FROM 
    smw_di_bool sdb
    JOIN smw_object_ids sois ON sois.smw_id=sdb.s_id
    JOIN smw_object_ids soip ON soip.smw_id=sdb.p_id  
UNION
  SELECT 
    CONCAT(namespace(sois.smw_namespace),CAST(sois.smw_title AS CHAR),CAST(sois.smw_subobject AS CHAR)) AS subject,
    CONCAT(namespace(soip.smw_namespace),CAST(soip.smw_title AS CHAR),CAST(soip.smw_subobject AS CHAR)) AS predicate,
    sdb.o_sortkey AS object,
    'number' AS TYPE
  FROM 
    smw_di_number sdb
    JOIN smw_object_ids sois ON sois.smw_id=sdb.s_id
    JOIN smw_object_ids soip ON soip.smw_id=sdb.p_id
UNION 
  SELECT 
    CONCAT(namespace(sois.smw_namespace),CAST(sois.smw_title AS CHAR),CAST(sois.smw_subobject AS CHAR)) AS subject,
    CONCAT(namespace(soip.smw_namespace),CAST(soip.smw_title AS CHAR),CAST(soip.smw_subobject AS CHAR)) AS predicate,
    CONCAT(
       split_str(CAST(sdb.o_serialized AS CHAR),'/',2),'-',
       split_str(CAST(sdb.o_serialized AS CHAR),'/',3),'-',
       split_str(CAST(sdb.o_serialized AS CHAR),'/',4)
       ) AS object,
    'time' AS TYPE
  FROM 
    smw_di_time sdb
    JOIN smw_object_ids sois ON sois.smw_id=sdb.s_id
    JOIN smw_object_ids soip ON soip.smw_id=sdb.p_id
UNION
  SELECT 
    CONCAT(namespace(sois.smw_namespace),CAST(sois.smw_title AS CHAR),CAST(sois.smw_subobject AS CHAR)) AS subject,
    CONCAT(namespace(soip.smw_namespace),CAST(soip.smw_title AS CHAR),CAST(soip.smw_subobject AS CHAR)) AS predicate,
    CAST(sdb.o_serialized AS CHAR) AS object,
    'uri' AS TYPE
  FROM 
    smw_di_uri sdb
    JOIN smw_object_ids sois ON sois.smw_id=sdb.s_id
    JOIN smw_object_ids soip ON soip.smw_id=sdb.p_id
UNION
  SELECT 
    CONCAT(namespace(sois.smw_namespace),CAST(sois.smw_title AS CHAR),CAST(sois.smw_subobject AS CHAR)) AS subject,
    CONCAT(namespace(soip.smw_namespace),CAST(soip.smw_title AS CHAR),CAST(soip.smw_subobject AS CHAR)) AS predicate,
    CONCAT(namespace(soio.smw_namespace),CAST(soio.smw_title AS CHAR),CAST(soio.smw_subobject AS CHAR)) AS object,
    'page' AS TYPE
  FROM 
    smw_di_wikipage sdb
    JOIN smw_object_ids sois ON sois.smw_id=sdb.s_id
    JOIN smw_object_ids soip ON soip.smw_id=sdb.p_id
    JOIN smw_object_ids soio ON soio.smw_id=sdb.o_id;