bench_executor.sdmrdfizer
SDM-RDFizer is an efficient RML-Compliant engine for knowledge graph construction.
Repository: https://github.com/SDM-TIB/SDM-RDFizer
1#!/usr/bin/env python3 2 3""" 4SDM-RDFizer is an efficient RML-Compliant engine for knowledge graph 5construction. 6 7**Repository**: https://github.com/SDM-TIB/SDM-RDFizer 8""" 9 10import os 11import configparser 12from rdflib import Graph, BNode, Namespace, Literal, RDF 13from timeout_decorator import timeout, TimeoutError # type: ignore 14from typing import Optional 15from bench_executor.container import Container 16from bench_executor.logger import Logger 17 18VERSION = '4.6.6.5' 19TIMEOUT = 6 * 3600 # 6 hours 20R2RML = Namespace('http://www.w3.org/ns/r2rml#') 21RML = Namespace('http://semweb.mmlab.be/ns/rml#') 22D2RQ = Namespace('http://www.wiwiss.fu-berlin.de/suhl/bizer/D2RQ/0.1#') 23IMAGE = f'blindreviewing/sdm-rdfizer:v{VERSION}' 24 25 26class SDMRDFizer(Container): 27 """SDMRDFizer container for executing RML mappings.""" 28 29 def __init__(self, data_path: str, config_path: str, directory: str, 30 verbose: bool): 31 """Creates an instance of the SDMRDFizer class. 32 33 Parameters 34 ---------- 35 data_path : str 36 Path to the data directory of the case. 37 config_path : str 38 Path to the config directory of the case. 39 directory : str 40 Path to the directory to store logs. 41 verbose : bool 42 Enable verbose logs. 43 """ 44 self._data_path = os.path.abspath(data_path) 45 self._config_path = os.path.abspath(config_path) 46 self._logger = Logger(__name__, directory, verbose) 47 48 os.umask(0) 49 os.makedirs(os.path.join(self._data_path, 'sdmrdfizer'), exist_ok=True) 50 super().__init__(IMAGE, 'SDM-RDFizer', self._logger, 51 volumes=[f'{self._data_path}/sdmrdfizer:/data', 52 f'{self._data_path}/shared:/data/shared']) 53 54 @property 55 def root_mount_directory(self) -> str: 56 """Subdirectory in the root directory of the case for SDM-RDFizer. 57 58 Returns 59 ------- 60 subdirectory : str 61 Subdirectory of the root directory for SDM-RDFizer. 62 63 """ 64 return __name__.lower() 65 66 @timeout(TIMEOUT) 67 def _execute_with_timeout(self, arguments) -> bool: 68 """Execute a mapping with a provided timeout. 69 70 Returns 71 ------- 72 success : bool 73 Whether the execution was successfull or not. 74 """ 75 cmd = 'python3 sdm-rdfizer/rdfizer/run_rdfizer.py ' + \ 76 '/data/config_sdmrdfizer.ini' 77 return self.run_and_wait_for_exit(cmd) 78 79 def execute(self, arguments: list) -> bool: 80 """Execute SDM-RDFizer with given arguments. 81 82 Parameters 83 ---------- 84 arguments : list 85 Arguments to supply to SDM-RDFizer. 86 87 Returns 88 ------- 89 success : bool 90 Whether the execution succeeded or not. 91 """ 92 try: 93 return self._execute_with_timeout(arguments) 94 except TimeoutError: 95 msg = f'Timeout ({TIMEOUT}s) reached for SDM-RDFizer' 96 self._logger.error(msg) 97 98 return False 99 100 def execute_mapping(self, 101 mapping_file: str, 102 output_file: str, 103 serialization: str, 104 rdb_username: Optional[str] = None, 105 rdb_password: Optional[str] = None, 106 rdb_host: Optional[str] = None, 107 rdb_port: Optional[int] = None, 108 rdb_name: Optional[str] = None, 109 rdb_type: Optional[str] = None) -> bool: 110 """Execute a mapping file with SDM-RDFizer. 111 112 N-Quads and N-Triples are currently supported as serialization 113 format for RMLMapper. 114 115 Parameters 116 ---------- 117 mapping_file : str 118 Path to the mapping file to execute. 119 output_file : str 120 Name of the output file to store the triples in. 121 serialization : str 122 Serialization format to use. 123 rdb_username : Optional[str] 124 Username for the database, required when a database is used as 125 source. 126 rdb_password : Optional[str] 127 Password for the database, required when a database is used as 128 source. 129 rdb_host : Optional[str] 130 Hostname for the database, required when a database is used as 131 source. 132 rdb_port : Optional[int] 133 Port for the database, required when a database is used as source. 134 rdb_name : Optional[str] 135 Database name for the database, required when a database is used as 136 source. 137 rdb_type : Optional[str] 138 Database type, required when a database is used as source. 139 140 Returns 141 ------- 142 success : bool 143 Whether the execution was successfull or not. 144 """ 145 146 # Configuration file 147 name = os.path.splitext(os.path.basename(output_file))[0] 148 config = configparser.ConfigParser(delimiters=':') 149 config['default'] = { 150 'main_directory': '/data/shared' 151 } 152 config['datasets'] = { 153 'number_of_datasets': str(1), 154 'output_folder': '/data/shared', 155 'all_in_one_file': 'yes', 156 'remove_duplicate': 'yes', 157 'enrichment': 'yes', 158 'name': name, 159 'ordered': 'no', 160 'large_file': 'false' 161 } 162 config['dataset1'] = { 163 'name': name, 164 'mapping': f'/data/shared/{os.path.basename(mapping_file)}' 165 } 166 167 if serialization == 'ntriples': 168 config['datasets']['output_format'] = 'n-triples' 169 elif serialization == 'turtle': 170 config['datasets']['output_format'] = 'turtle' 171 else: 172 raise NotImplementedError('SDM-RDFizer does not support' 173 '"serialization" output format') 174 175 if rdb_username is not None and rdb_password is not None \ 176 and rdb_host is not None and rdb_port is not None \ 177 and rdb_name is not None and rdb_type is not None: 178 config['dataset1']['user'] = rdb_username 179 config['dataset1']['password'] = rdb_password 180 config['dataset1']['host'] = rdb_host 181 config['dataset1']['port'] = str(rdb_port) 182 config['dataset1']['db'] = rdb_name 183 config['dataset1']['mapping'] = '/data/mapping_converted.rml.ttl' 184 if rdb_type == 'MySQL': 185 config['datasets']['dbType'] = 'mysql' 186 driver = 'jdbc:mysql' 187 elif rdb_type == 'PostgreSQL': 188 config['datasets']['dbType'] = 'postgres' 189 driver = 'jdbc:postgresql' 190 else: 191 raise NotImplementedError('SDM-RDFizer does not support RDB ' 192 f'"{rdb_type}"') 193 dsn = f'{driver}://{rdb_host}:{rdb_port}/{rdb_name}' 194 195 # Compatibility with R2RML mapping files 196 # Replace rr:logicalTable with rml:logicalSource + D2RQ description 197 # and rr:column with rml:reference 198 g = Graph() 199 g.bind('rr', R2RML) 200 g.bind('rml', RML) 201 g.bind('d2rq', D2RQ) 202 g.bind('rdf', RDF) 203 g.parse(os.path.join(self._data_path, 'shared', 204 os.path.basename(mapping_file))) 205 206 # rr:logicalTable --> rml:logicalSource 207 for triples_map_iri, p, o in g.triples((None, RDF.type, 208 R2RML.TriplesMap)): 209 logical_source_iri = BNode() 210 d2rq_rdb_iri = BNode() 211 logical_table_iri = g.value(triples_map_iri, 212 R2RML.logicalTable) 213 table_name_literal = g.value(logical_table_iri, 214 R2RML.tableName) 215 g.add((d2rq_rdb_iri, D2RQ.jdbcDSN, Literal(dsn))) 216 g.add((d2rq_rdb_iri, D2RQ.jdbcDriver, Literal(driver))) 217 g.add((d2rq_rdb_iri, D2RQ.username, Literal(rdb_username))) 218 g.add((d2rq_rdb_iri, D2RQ.password, Literal(rdb_password))) 219 g.add((d2rq_rdb_iri, RDF.type, D2RQ.Database)) 220 g.add((logical_source_iri, R2RML.sqlVersion, R2RML.SQL2008)) 221 g.add((logical_source_iri, R2RML.tableName, 222 table_name_literal)) 223 g.add((logical_source_iri, RML.source, d2rq_rdb_iri)) 224 g.add((logical_source_iri, RDF.type, RML.LogicalSource)) 225 g.add((triples_map_iri, RML.logicalSource, logical_source_iri)) 226 g.remove((triples_map_iri, R2RML.logicalTable, 227 logical_table_iri)) 228 g.remove((logical_table_iri, R2RML.tableName, 229 table_name_literal)) 230 g.remove((logical_table_iri, RDF.type, R2RML.LogicalTable)) 231 g.remove((logical_table_iri, R2RML.sqlVersion, R2RML.SQL2008)) 232 233 # rr:column --> rml:reference 234 for s, p, o in g.triples((None, R2RML.column, None)): 235 g.add((s, RML.reference, o)) 236 g.remove((s, p, o)) 237 238 # SDM-RDFizer cannot handle rml:referenceFormulation when using 239 # RDBs, remove it for safety 240 # https://github.com/SDM-TIB/SDM-RDFizer/issues/71 241 for s, p, o in g.triples((None, RML.referenceFormulation, None)): 242 g.remove((s, p, o)) 243 244 destination = os.path.join(self._data_path, 'sdmrdfizer', 245 'mapping_converted.rml.ttl') 246 g.serialize(destination=destination, format='turtle') 247 248 os.umask(0) 249 os.makedirs(os.path.join(self._data_path, 'sdmrdfizer'), exist_ok=True) 250 path = os.path.join(self._data_path, 'sdmrdfizer', 251 'config_sdmrdfizer.ini') 252 with open(path, 'w') as f: 253 config.write(f, space_around_delimiters=False) 254 255 return self.execute([])
27class SDMRDFizer(Container): 28 """SDMRDFizer container for executing RML mappings.""" 29 30 def __init__(self, data_path: str, config_path: str, directory: str, 31 verbose: bool): 32 """Creates an instance of the SDMRDFizer class. 33 34 Parameters 35 ---------- 36 data_path : str 37 Path to the data directory of the case. 38 config_path : str 39 Path to the config directory of the case. 40 directory : str 41 Path to the directory to store logs. 42 verbose : bool 43 Enable verbose logs. 44 """ 45 self._data_path = os.path.abspath(data_path) 46 self._config_path = os.path.abspath(config_path) 47 self._logger = Logger(__name__, directory, verbose) 48 49 os.umask(0) 50 os.makedirs(os.path.join(self._data_path, 'sdmrdfizer'), exist_ok=True) 51 super().__init__(IMAGE, 'SDM-RDFizer', self._logger, 52 volumes=[f'{self._data_path}/sdmrdfizer:/data', 53 f'{self._data_path}/shared:/data/shared']) 54 55 @property 56 def root_mount_directory(self) -> str: 57 """Subdirectory in the root directory of the case for SDM-RDFizer. 58 59 Returns 60 ------- 61 subdirectory : str 62 Subdirectory of the root directory for SDM-RDFizer. 63 64 """ 65 return __name__.lower() 66 67 @timeout(TIMEOUT) 68 def _execute_with_timeout(self, arguments) -> bool: 69 """Execute a mapping with a provided timeout. 70 71 Returns 72 ------- 73 success : bool 74 Whether the execution was successfull or not. 75 """ 76 cmd = 'python3 sdm-rdfizer/rdfizer/run_rdfizer.py ' + \ 77 '/data/config_sdmrdfizer.ini' 78 return self.run_and_wait_for_exit(cmd) 79 80 def execute(self, arguments: list) -> bool: 81 """Execute SDM-RDFizer with given arguments. 82 83 Parameters 84 ---------- 85 arguments : list 86 Arguments to supply to SDM-RDFizer. 87 88 Returns 89 ------- 90 success : bool 91 Whether the execution succeeded or not. 92 """ 93 try: 94 return self._execute_with_timeout(arguments) 95 except TimeoutError: 96 msg = f'Timeout ({TIMEOUT}s) reached for SDM-RDFizer' 97 self._logger.error(msg) 98 99 return False 100 101 def execute_mapping(self, 102 mapping_file: str, 103 output_file: str, 104 serialization: str, 105 rdb_username: Optional[str] = None, 106 rdb_password: Optional[str] = None, 107 rdb_host: Optional[str] = None, 108 rdb_port: Optional[int] = None, 109 rdb_name: Optional[str] = None, 110 rdb_type: Optional[str] = None) -> bool: 111 """Execute a mapping file with SDM-RDFizer. 112 113 N-Quads and N-Triples are currently supported as serialization 114 format for RMLMapper. 115 116 Parameters 117 ---------- 118 mapping_file : str 119 Path to the mapping file to execute. 120 output_file : str 121 Name of the output file to store the triples in. 122 serialization : str 123 Serialization format to use. 124 rdb_username : Optional[str] 125 Username for the database, required when a database is used as 126 source. 127 rdb_password : Optional[str] 128 Password for the database, required when a database is used as 129 source. 130 rdb_host : Optional[str] 131 Hostname for the database, required when a database is used as 132 source. 133 rdb_port : Optional[int] 134 Port for the database, required when a database is used as source. 135 rdb_name : Optional[str] 136 Database name for the database, required when a database is used as 137 source. 138 rdb_type : Optional[str] 139 Database type, required when a database is used as source. 140 141 Returns 142 ------- 143 success : bool 144 Whether the execution was successfull or not. 145 """ 146 147 # Configuration file 148 name = os.path.splitext(os.path.basename(output_file))[0] 149 config = configparser.ConfigParser(delimiters=':') 150 config['default'] = { 151 'main_directory': '/data/shared' 152 } 153 config['datasets'] = { 154 'number_of_datasets': str(1), 155 'output_folder': '/data/shared', 156 'all_in_one_file': 'yes', 157 'remove_duplicate': 'yes', 158 'enrichment': 'yes', 159 'name': name, 160 'ordered': 'no', 161 'large_file': 'false' 162 } 163 config['dataset1'] = { 164 'name': name, 165 'mapping': f'/data/shared/{os.path.basename(mapping_file)}' 166 } 167 168 if serialization == 'ntriples': 169 config['datasets']['output_format'] = 'n-triples' 170 elif serialization == 'turtle': 171 config['datasets']['output_format'] = 'turtle' 172 else: 173 raise NotImplementedError('SDM-RDFizer does not support' 174 '"serialization" output format') 175 176 if rdb_username is not None and rdb_password is not None \ 177 and rdb_host is not None and rdb_port is not None \ 178 and rdb_name is not None and rdb_type is not None: 179 config['dataset1']['user'] = rdb_username 180 config['dataset1']['password'] = rdb_password 181 config['dataset1']['host'] = rdb_host 182 config['dataset1']['port'] = str(rdb_port) 183 config['dataset1']['db'] = rdb_name 184 config['dataset1']['mapping'] = '/data/mapping_converted.rml.ttl' 185 if rdb_type == 'MySQL': 186 config['datasets']['dbType'] = 'mysql' 187 driver = 'jdbc:mysql' 188 elif rdb_type == 'PostgreSQL': 189 config['datasets']['dbType'] = 'postgres' 190 driver = 'jdbc:postgresql' 191 else: 192 raise NotImplementedError('SDM-RDFizer does not support RDB ' 193 f'"{rdb_type}"') 194 dsn = f'{driver}://{rdb_host}:{rdb_port}/{rdb_name}' 195 196 # Compatibility with R2RML mapping files 197 # Replace rr:logicalTable with rml:logicalSource + D2RQ description 198 # and rr:column with rml:reference 199 g = Graph() 200 g.bind('rr', R2RML) 201 g.bind('rml', RML) 202 g.bind('d2rq', D2RQ) 203 g.bind('rdf', RDF) 204 g.parse(os.path.join(self._data_path, 'shared', 205 os.path.basename(mapping_file))) 206 207 # rr:logicalTable --> rml:logicalSource 208 for triples_map_iri, p, o in g.triples((None, RDF.type, 209 R2RML.TriplesMap)): 210 logical_source_iri = BNode() 211 d2rq_rdb_iri = BNode() 212 logical_table_iri = g.value(triples_map_iri, 213 R2RML.logicalTable) 214 table_name_literal = g.value(logical_table_iri, 215 R2RML.tableName) 216 g.add((d2rq_rdb_iri, D2RQ.jdbcDSN, Literal(dsn))) 217 g.add((d2rq_rdb_iri, D2RQ.jdbcDriver, Literal(driver))) 218 g.add((d2rq_rdb_iri, D2RQ.username, Literal(rdb_username))) 219 g.add((d2rq_rdb_iri, D2RQ.password, Literal(rdb_password))) 220 g.add((d2rq_rdb_iri, RDF.type, D2RQ.Database)) 221 g.add((logical_source_iri, R2RML.sqlVersion, R2RML.SQL2008)) 222 g.add((logical_source_iri, R2RML.tableName, 223 table_name_literal)) 224 g.add((logical_source_iri, RML.source, d2rq_rdb_iri)) 225 g.add((logical_source_iri, RDF.type, RML.LogicalSource)) 226 g.add((triples_map_iri, RML.logicalSource, logical_source_iri)) 227 g.remove((triples_map_iri, R2RML.logicalTable, 228 logical_table_iri)) 229 g.remove((logical_table_iri, R2RML.tableName, 230 table_name_literal)) 231 g.remove((logical_table_iri, RDF.type, R2RML.LogicalTable)) 232 g.remove((logical_table_iri, R2RML.sqlVersion, R2RML.SQL2008)) 233 234 # rr:column --> rml:reference 235 for s, p, o in g.triples((None, R2RML.column, None)): 236 g.add((s, RML.reference, o)) 237 g.remove((s, p, o)) 238 239 # SDM-RDFizer cannot handle rml:referenceFormulation when using 240 # RDBs, remove it for safety 241 # https://github.com/SDM-TIB/SDM-RDFizer/issues/71 242 for s, p, o in g.triples((None, RML.referenceFormulation, None)): 243 g.remove((s, p, o)) 244 245 destination = os.path.join(self._data_path, 'sdmrdfizer', 246 'mapping_converted.rml.ttl') 247 g.serialize(destination=destination, format='turtle') 248 249 os.umask(0) 250 os.makedirs(os.path.join(self._data_path, 'sdmrdfizer'), exist_ok=True) 251 path = os.path.join(self._data_path, 'sdmrdfizer', 252 'config_sdmrdfizer.ini') 253 with open(path, 'w') as f: 254 config.write(f, space_around_delimiters=False) 255 256 return self.execute([])
SDMRDFizer container for executing RML mappings.
SDMRDFizer(data_path: str, config_path: str, directory: str, verbose: bool)
30 def __init__(self, data_path: str, config_path: str, directory: str, 31 verbose: bool): 32 """Creates an instance of the SDMRDFizer class. 33 34 Parameters 35 ---------- 36 data_path : str 37 Path to the data directory of the case. 38 config_path : str 39 Path to the config directory of the case. 40 directory : str 41 Path to the directory to store logs. 42 verbose : bool 43 Enable verbose logs. 44 """ 45 self._data_path = os.path.abspath(data_path) 46 self._config_path = os.path.abspath(config_path) 47 self._logger = Logger(__name__, directory, verbose) 48 49 os.umask(0) 50 os.makedirs(os.path.join(self._data_path, 'sdmrdfizer'), exist_ok=True) 51 super().__init__(IMAGE, 'SDM-RDFizer', self._logger, 52 volumes=[f'{self._data_path}/sdmrdfizer:/data', 53 f'{self._data_path}/shared:/data/shared'])
Creates an instance of the SDMRDFizer class.
Parameters
- data_path (str): Path to the data directory of the case.
- config_path (str): Path to the config directory of the case.
- directory (str): Path to the directory to store logs.
- verbose (bool): Enable verbose logs.
root_mount_directory: str
Subdirectory in the root directory of the case for SDM-RDFizer.
Returns
- subdirectory (str): Subdirectory of the root directory for SDM-RDFizer.
def
execute(self, arguments: list) -> bool:
80 def execute(self, arguments: list) -> bool: 81 """Execute SDM-RDFizer with given arguments. 82 83 Parameters 84 ---------- 85 arguments : list 86 Arguments to supply to SDM-RDFizer. 87 88 Returns 89 ------- 90 success : bool 91 Whether the execution succeeded or not. 92 """ 93 try: 94 return self._execute_with_timeout(arguments) 95 except TimeoutError: 96 msg = f'Timeout ({TIMEOUT}s) reached for SDM-RDFizer' 97 self._logger.error(msg) 98 99 return False
Execute SDM-RDFizer with given arguments.
Parameters
- arguments (list): Arguments to supply to SDM-RDFizer.
Returns
- success (bool): Whether the execution succeeded or not.
def
execute_mapping( self, mapping_file: str, output_file: str, serialization: str, rdb_username: Optional[str] = None, rdb_password: Optional[str] = None, rdb_host: Optional[str] = None, rdb_port: Optional[int] = None, rdb_name: Optional[str] = None, rdb_type: Optional[str] = None) -> bool:
101 def execute_mapping(self, 102 mapping_file: str, 103 output_file: str, 104 serialization: str, 105 rdb_username: Optional[str] = None, 106 rdb_password: Optional[str] = None, 107 rdb_host: Optional[str] = None, 108 rdb_port: Optional[int] = None, 109 rdb_name: Optional[str] = None, 110 rdb_type: Optional[str] = None) -> bool: 111 """Execute a mapping file with SDM-RDFizer. 112 113 N-Quads and N-Triples are currently supported as serialization 114 format for RMLMapper. 115 116 Parameters 117 ---------- 118 mapping_file : str 119 Path to the mapping file to execute. 120 output_file : str 121 Name of the output file to store the triples in. 122 serialization : str 123 Serialization format to use. 124 rdb_username : Optional[str] 125 Username for the database, required when a database is used as 126 source. 127 rdb_password : Optional[str] 128 Password for the database, required when a database is used as 129 source. 130 rdb_host : Optional[str] 131 Hostname for the database, required when a database is used as 132 source. 133 rdb_port : Optional[int] 134 Port for the database, required when a database is used as source. 135 rdb_name : Optional[str] 136 Database name for the database, required when a database is used as 137 source. 138 rdb_type : Optional[str] 139 Database type, required when a database is used as source. 140 141 Returns 142 ------- 143 success : bool 144 Whether the execution was successfull or not. 145 """ 146 147 # Configuration file 148 name = os.path.splitext(os.path.basename(output_file))[0] 149 config = configparser.ConfigParser(delimiters=':') 150 config['default'] = { 151 'main_directory': '/data/shared' 152 } 153 config['datasets'] = { 154 'number_of_datasets': str(1), 155 'output_folder': '/data/shared', 156 'all_in_one_file': 'yes', 157 'remove_duplicate': 'yes', 158 'enrichment': 'yes', 159 'name': name, 160 'ordered': 'no', 161 'large_file': 'false' 162 } 163 config['dataset1'] = { 164 'name': name, 165 'mapping': f'/data/shared/{os.path.basename(mapping_file)}' 166 } 167 168 if serialization == 'ntriples': 169 config['datasets']['output_format'] = 'n-triples' 170 elif serialization == 'turtle': 171 config['datasets']['output_format'] = 'turtle' 172 else: 173 raise NotImplementedError('SDM-RDFizer does not support' 174 '"serialization" output format') 175 176 if rdb_username is not None and rdb_password is not None \ 177 and rdb_host is not None and rdb_port is not None \ 178 and rdb_name is not None and rdb_type is not None: 179 config['dataset1']['user'] = rdb_username 180 config['dataset1']['password'] = rdb_password 181 config['dataset1']['host'] = rdb_host 182 config['dataset1']['port'] = str(rdb_port) 183 config['dataset1']['db'] = rdb_name 184 config['dataset1']['mapping'] = '/data/mapping_converted.rml.ttl' 185 if rdb_type == 'MySQL': 186 config['datasets']['dbType'] = 'mysql' 187 driver = 'jdbc:mysql' 188 elif rdb_type == 'PostgreSQL': 189 config['datasets']['dbType'] = 'postgres' 190 driver = 'jdbc:postgresql' 191 else: 192 raise NotImplementedError('SDM-RDFizer does not support RDB ' 193 f'"{rdb_type}"') 194 dsn = f'{driver}://{rdb_host}:{rdb_port}/{rdb_name}' 195 196 # Compatibility with R2RML mapping files 197 # Replace rr:logicalTable with rml:logicalSource + D2RQ description 198 # and rr:column with rml:reference 199 g = Graph() 200 g.bind('rr', R2RML) 201 g.bind('rml', RML) 202 g.bind('d2rq', D2RQ) 203 g.bind('rdf', RDF) 204 g.parse(os.path.join(self._data_path, 'shared', 205 os.path.basename(mapping_file))) 206 207 # rr:logicalTable --> rml:logicalSource 208 for triples_map_iri, p, o in g.triples((None, RDF.type, 209 R2RML.TriplesMap)): 210 logical_source_iri = BNode() 211 d2rq_rdb_iri = BNode() 212 logical_table_iri = g.value(triples_map_iri, 213 R2RML.logicalTable) 214 table_name_literal = g.value(logical_table_iri, 215 R2RML.tableName) 216 g.add((d2rq_rdb_iri, D2RQ.jdbcDSN, Literal(dsn))) 217 g.add((d2rq_rdb_iri, D2RQ.jdbcDriver, Literal(driver))) 218 g.add((d2rq_rdb_iri, D2RQ.username, Literal(rdb_username))) 219 g.add((d2rq_rdb_iri, D2RQ.password, Literal(rdb_password))) 220 g.add((d2rq_rdb_iri, RDF.type, D2RQ.Database)) 221 g.add((logical_source_iri, R2RML.sqlVersion, R2RML.SQL2008)) 222 g.add((logical_source_iri, R2RML.tableName, 223 table_name_literal)) 224 g.add((logical_source_iri, RML.source, d2rq_rdb_iri)) 225 g.add((logical_source_iri, RDF.type, RML.LogicalSource)) 226 g.add((triples_map_iri, RML.logicalSource, logical_source_iri)) 227 g.remove((triples_map_iri, R2RML.logicalTable, 228 logical_table_iri)) 229 g.remove((logical_table_iri, R2RML.tableName, 230 table_name_literal)) 231 g.remove((logical_table_iri, RDF.type, R2RML.LogicalTable)) 232 g.remove((logical_table_iri, R2RML.sqlVersion, R2RML.SQL2008)) 233 234 # rr:column --> rml:reference 235 for s, p, o in g.triples((None, R2RML.column, None)): 236 g.add((s, RML.reference, o)) 237 g.remove((s, p, o)) 238 239 # SDM-RDFizer cannot handle rml:referenceFormulation when using 240 # RDBs, remove it for safety 241 # https://github.com/SDM-TIB/SDM-RDFizer/issues/71 242 for s, p, o in g.triples((None, RML.referenceFormulation, None)): 243 g.remove((s, p, o)) 244 245 destination = os.path.join(self._data_path, 'sdmrdfizer', 246 'mapping_converted.rml.ttl') 247 g.serialize(destination=destination, format='turtle') 248 249 os.umask(0) 250 os.makedirs(os.path.join(self._data_path, 'sdmrdfizer'), exist_ok=True) 251 path = os.path.join(self._data_path, 'sdmrdfizer', 252 'config_sdmrdfizer.ini') 253 with open(path, 'w') as f: 254 config.write(f, space_around_delimiters=False) 255 256 return self.execute([])
Execute a mapping file with SDM-RDFizer.
N-Quads and N-Triples are currently supported as serialization format for RMLMapper.
Parameters
- mapping_file (str): Path to the mapping file to execute.
- output_file (str): Name of the output file to store the triples in.
- serialization (str): Serialization format to use.
- rdb_username (Optional[str]): Username for the database, required when a database is used as source.
- rdb_password (Optional[str]): Password for the database, required when a database is used as source.
- rdb_host (Optional[str]): Hostname for the database, required when a database is used as source.
- rdb_port (Optional[int]): Port for the database, required when a database is used as source.
- rdb_name (Optional[str]): Database name for the database, required when a database is used as source.
- rdb_type (Optional[str]): Database type, required when a database is used as source.
Returns
- success (bool): Whether the execution was successfull or not.