bench_executor.ontop
Ontop is a Virtual Knowledge Graph system. It exposes the content of arbitrary relational databases as knowledge graphs. These graphs are virtual, which means that data remains in the data sources instead of being moved to another database.
Website: https://ontop-vkg.org
Repository: https://github.com/ontop/ontop
1#!/usr/bin/env python3 2 3""" 4Ontop is a Virtual Knowledge Graph system. It exposes the content of 5arbitrary relational databases as knowledge graphs. These graphs are virtual, 6which means that data remains in the data sources instead of being moved 7to another database. 8 9**Website**: https://ontop-vkg.org<br> 10**Repository**: https://github.com/ontop/ontop 11""" 12 13import os 14import psutil 15import configparser 16from rdflib import Graph, Namespace, RDF, URIRef 17from timeout_decorator import timeout, TimeoutError # type: ignore 18from typing import Dict, Optional 19from bench_executor.container import Container 20from bench_executor.logger import Logger 21 22VERSION = '5.0.0' 23TIMEOUT = 6 * 3600 # 6 hours 24R2RML = Namespace('http://www.w3.org/ns/r2rml#') 25 26 27class Ontop(Container): 28 """Ontop container super class for OntopMaterialize and OntopVirtualize.""" 29 def __init__(self, name: str, data_path: str, logger: Logger, mode: str): 30 """Creates an instance of the Ontop class. 31 32 Parameters 33 ---------- 34 name : str 35 Pretty name of the container. 36 data_path: str 37 Path to the data directory of the case. 38 logger : Logger 39 Logger to use for log messages. 40 mode : str 41 Ontop mode: `materialize` or `endpoint` 42 """ 43 self._mode = mode 44 self._headers: Dict[str, Dict[str, str]] = {} 45 self._logger = logger 46 self._data_path = data_path 47 48 if self._mode == 'endpoint': 49 subdir = 'ontopvirtualize' 50 elif self._mode == 'materialize': 51 subdir = 'ontopmaterialize' 52 else: 53 raise ValueError(f'Unknown Ontop mode: "{self._mode}"') 54 os.makedirs(os.path.join(self._data_path, subdir), exist_ok=True) 55 56 # Set Java heap to 1/2 of available memory instead of the default 1/4 57 max_heap = int(psutil.virtual_memory().total * (1/2)) 58 59 # Configure logging 60 log_level = 'info' 61 if self._logger.verbose: 62 log_level = 'debug' 63 self._logger.info(f'Initialized Ontop logger at "{log_level}" level') 64 65 environment = {'ONTOP_JAVA_ARGS': f'-Xmx{max_heap} -Xms{max_heap}', 66 'ONTOP_LOG_LEVEL': log_level} 67 super().__init__(f'blindreviewing/ontop:v{VERSION}', name, 68 self._logger, 69 ports={'8888': '8888'}, 70 environment=environment, 71 volumes=[f'{self._data_path}/' 72 f'{self.root_mount_directory}:/data', 73 f'{self._data_path}/shared:/data/shared']) 74 75 @property 76 def root_mount_directory(self) -> str: 77 """Subdirectory in the root directory of the case for Ontop. 78 79 Returns 80 ------- 81 subdirectory : str 82 Subdirectory of the root directory for Ontop. 83 84 """ 85 if self._mode == 'endpoint': 86 return 'ontopvirtualize' 87 elif self._mode == 'materialize': 88 return 'ontopmaterialize' 89 else: 90 raise ValueError(f'Unknown Ontop mode: "{self._mode}"') 91 92 @property 93 def endpoint(self) -> str: 94 """SPARQL endpoint URL for Ontop. 95 96 Returns 97 ------- 98 url : str 99 SPARQL endpoint URL. 100 """ 101 return 'http://localhost:8888/sparql' 102 103 @property 104 def headers(self) -> dict: 105 """HTTP headers of SPARQL queries for serialization formats. 106 107 Only supported serialization formats are included in the dictionary. 108 Currently, the following formats are supported: 109 - N-Triples 110 - N-Quads 111 - Turtle 112 - CSV 113 - RDF/JSON 114 - RDF/XML 115 - JSON-LD 116 117 Returns 118 ------- 119 headers : dict 120 Dictionary of headers to use for each serialization format. 121 """ 122 return self._headers 123 124 def _execute(self, arguments: list) -> bool: 125 """Execute Ontop with given arguments. 126 127 Parameters 128 ---------- 129 arguments : list 130 Arguments to supply to Ontop. 131 132 Returns 133 ------- 134 success : bool 135 Whether the execution succeeded or not. 136 """ 137 138 cmd = f'/ontop/ontop {self._mode} {" ".join(arguments)}' 139 self._logger.info(f'Executing Ontop with command: {cmd}') 140 if self._mode == 'endpoint': 141 log_line = 'OntopEndpointApplication - Started ' + \ 142 'OntopEndpointApplication' 143 success = self.run_and_wait_for_log(log_line, cmd) 144 elif self._mode == 'materialize': 145 success = self.run_and_wait_for_exit(cmd) 146 else: 147 self._logger.error(f'Unknown Ontop mode "{self._mode}"') 148 success = False 149 150 return success 151 152 def _execute_mapping(self, 153 config_file: str, 154 arguments: list, 155 mapping_file: str, 156 output_file: Optional[str], 157 rdb_username: str, 158 rdb_password: str, 159 rdb_host: str, 160 rdb_port: int, 161 rdb_name: str, 162 rdb_type: str) -> bool: 163 """Execute a mapping file with Ontop. 164 165 Only relational databases are supported by 166 Ontop, thus the relational database parameters are mandantory. 167 168 Parameters 169 ---------- 170 config_file : str 171 Name of the generated config file for Ontop. 172 arguments : list 173 List of arguments to pass to Ontop. 174 mapping_file : str 175 Name of the mapping file to use. 176 output_file : Optional[str] 177 Name of the output file to use. Only applicable for 178 materialization. 179 rdb_username : str 180 Username for the database. 181 rdb_password : str 182 Password for the database. 183 rdb_host : str 184 Hostname for the database. 185 rdb_port : int 186 Port for the database. 187 rdb_name : str 188 Database name for the database. 189 rdb_type : str 190 Database type. 191 192 Returns 193 ------- 194 success : bool 195 Whether the execution was successfull or not. 196 """ 197 # Generate INI configuration file since no CLI is available 198 config = configparser.ConfigParser() 199 config['root'] = {} 200 if rdb_type == 'MySQL': 201 dsn = f'jdbc:mysql://{rdb_host}:{rdb_port}/{rdb_name}' 202 config['root']['jdbc.url'] = dsn 203 config['root']['jdbc.driver'] = 'com.mysql.cj.jdbc.Driver' 204 elif rdb_type == 'PostgreSQL': 205 dsn = f'jdbc:postgresql://{rdb_host}:{rdb_port}/{rdb_name}' 206 config['root']['jdbc.url'] = dsn 207 config['root']['jdbc.driver'] = 'org.postgresql.Driver' 208 else: 209 msg = f'Unknown RDB type: "{rdb_type}"' 210 self._logger.error(msg) 211 raise ValueError(msg) 212 config['root']['jdbc.user'] = rdb_username 213 config['root']['jdbc.password'] = rdb_password 214 215 path = os.path.join(self._data_path, self.root_mount_directory) 216 os.makedirs(path, exist_ok=True) 217 with open(os.path.join(path, 'config.properties'), 'w') as f: 218 config.write(f, space_around_delimiters=False) 219 220 # .properties files are like .ini files but without a [HEADER] 221 # Use a [root] header and remove it after writing 222 with open(os.path.join(path, 'config.properties'), 'r') as f: 223 data = f.read() 224 225 with open(os.path.join(path, 'config.properties'), 'w') as f: 226 f.write(data.replace('[root]\n', '')) 227 228 # Compatibility with Ontop requiring rr:class 229 # Replace any rdf:type construction with rr:class 230 # Without this, a strange error is raised: 'The definition of the 231 # predicate is not always a ground term triple(s,p,o)' 232 g = Graph() 233 g.bind('r2rml', R2RML) 234 g.bind('rdf', RDF) 235 g.parse(os.path.join(self._data_path, 'shared', 236 os.path.basename(mapping_file))) 237 238 for triples_map_iri, p, o in g.triples((None, RDF.type, 239 R2RML.TriplesMap)): 240 subject_map_iri = g.value(triples_map_iri, R2RML.subjectMap) 241 242 iter_pom = g.triples((triples_map_iri, 243 R2RML.predicateObjectMap, 244 None)) 245 for s, p, predicate_object_map_iri in iter_pom: 246 predicate_map_iri = g.value(predicate_object_map_iri, 247 R2RML.predicateMap) 248 object_map_iri = g.value(predicate_object_map_iri, 249 R2RML.objectMap) 250 251 if predicate_map_iri is None or object_map_iri is None: 252 continue 253 254 # Check if PredicateObjectMap is pointing to a PredicateMap 255 # specifying rdf:type. Skip this PredicateObjectMap if not 256 if g.value(predicate_map_iri, R2RML.constant) != RDF.type: 257 continue 258 259 # Retrieve the ObjectMap rr:constant value and add it as 260 # rr:class to the Subject Map is present 261 rdf_type_value = g.value(object_map_iri, R2RML.constant) 262 if rdf_type_value is not None: 263 iri = URIRef(rdf_type_value.toPython()) 264 g.add((subject_map_iri, R2RML['class'], iri)) 265 else: 266 msg = 'Cannot extract rr:class value, rdf:type value ' + \ 267 'is not a constant value!' 268 self._logger.error(msg) 269 return False 270 271 # Remove all triples associated with the rdf:type PredicateMap 272 for s, p, o in g.triples((predicate_map_iri, None, None)): 273 g.remove((s, p, o)) 274 275 # Remove all triples associated with the rdf:type ObjectMap 276 for s, p, o in g.triples((object_map_iri, None, None)): 277 g.remove((s, p, o)) 278 279 # Remove all triples associated with the 280 # rdf:type PredicateObjectMap 281 for s, p, o in g.triples((object_map_iri, None, None)): 282 g.remove((s, p, o)) 283 284 # Remove PredicateObjectMap from Triples Map 285 g.remove((triples_map_iri, R2RML.predicateObjectMap, 286 predicate_object_map_iri)) 287 288 destination = os.path.join(self._data_path, 289 self.root_mount_directory, 290 'mapping_converted.r2rml.ttl') 291 g.serialize(destination=destination, format='turtle') 292 293 arguments.append('-m') 294 arguments.append('/data/mapping_converted.r2rml.ttl') 295 if output_file is not None: 296 arguments.append('-o') 297 arguments.append(os.path.join('/data/shared/', output_file)) 298 arguments.append('-p') 299 arguments.append('/data/config.properties') 300 301 return self._execute(arguments) 302 303 304class OntopVirtualize(Ontop): 305 """OntopVirtualize container for setting up an Ontop SPARQL endpoint.""" 306 def __init__(self, data_path: str, config_path: str, directory: str, 307 verbose: bool): 308 """Creates an instance of the OntopVirtualize class. 309 310 Parameters 311 ---------- 312 data_path : str 313 Path to the data directory of the case. 314 config_path : str 315 Path to the config directory of the case. 316 directory : str 317 Path to the directory to store logs. 318 verbose : bool 319 Enable verbose logs. 320 """ 321 self._data_path = os.path.abspath(data_path) 322 self._config_path = os.path.abspath(config_path) 323 self._logger = Logger(__name__, directory, verbose) 324 super().__init__('Ontop-Virtualize', self._data_path, self._logger, 325 'endpoint') 326 327 def execute_mapping(self, 328 mapping_file: str, 329 serialization: str, 330 rdb_username: str, 331 rdb_password: str, 332 rdb_host: str, 333 rdb_port: int, 334 rdb_name: str, 335 rdb_type: str) -> bool: 336 """Start an Ontop SPARQL endpoint with a mapping. 337 338 Only relational databases are supported by 339 Ontop, thus the relational database parameters are mandantory. 340 Ontop SPARQL endpoint supports the following serialization formats: 341 - N-Triples (Ontop v5+) 342 - N-Quads (Ontop v5+) 343 - Turtle 344 - RDF/JSON 345 - JSON-LD 346 - CSV 347 348 Parameters 349 ---------- 350 mapping_file : str 351 Path to the mapping file to execute. 352 serialization : str 353 Serialization format to use. 354 rdb_username : str 355 Username for the database. 356 rdb_password : str 357 Password for the database. 358 rdb_host : str 359 Hostname for the database. 360 rdb_port : int 361 Port for the database. 362 rdb_name : str 363 Database name for the database. 364 rdb_type : str 365 Database type. 366 367 Returns 368 ------- 369 success : bool 370 Whether the execution was successfull or not. 371 """ 372 config_file = f'{self._data_path}/{self.root_mount_directory}' + \ 373 '/config.properties' 374 arguments = ['--cors-allowed-origins=*', '--port=8888'] 375 self._headers['ntriples'] = {'Accept': 'application/n-triples'} 376 self._headers['nquads'] = {'Accept': 'application/n-quads'} 377 self._headers['turtle'] = {'Accept': 'text/turtle'} 378 self._headers['rdfjson'] = {'Accept': 'application/rdf+json'} 379 self._headers['rdfxml'] = {'Accept': 'application/rdf+xml'} 380 self._headers['jsonld'] = {'Accept': 'application/ld+json'} 381 self._headers['csv'] = {'Accept': 'text/csv'} 382 if serialization not in self._headers.keys(): 383 msg = 'Unsupported serialization format ' + \ 384 f'"{serialization}" for Ontop' 385 self._logger.error(msg) 386 raise ValueError(msg) 387 return super()._execute_mapping(config_file, arguments, 388 mapping_file, None, rdb_username, 389 rdb_password, rdb_host, rdb_port, 390 rdb_name, rdb_type) 391 392 393class OntopMaterialize(Ontop): 394 """OntopMaterialize container to execute a R2RML mapping.""" 395 def __init__(self, data_path: str, config_path: str, directory: str, 396 verbose: bool): 397 """Creates an instance of the OntopMaterialize class. 398 399 Parameters 400 ---------- 401 data_path : str 402 Path to the data directory of the case. 403 config_path : str 404 Path to the config directory of the case. 405 directory : str 406 Path to the directory to store logs. 407 verbose : bool 408 Enable verbose logs. 409 """ 410 self._data_path = os.path.abspath(data_path) 411 self._config_path = os.path.abspath(config_path) 412 self._logger = Logger(__name__, directory, verbose) 413 os.makedirs(os.path.join(self._data_path, 'ontopmaterialize'), 414 exist_ok=True) 415 super().__init__('Ontop-Materialize', self._data_path, self._logger, 416 'materialize') 417 418 @timeout(TIMEOUT) 419 def _execute_mapping_with_timeout(self, mapping_file: str, 420 output_file: str, 421 serialization: str, 422 rdb_username: str, 423 rdb_password: str, 424 rdb_host: str, 425 rdb_port: int, 426 rdb_name: str, 427 rdb_type: str) -> bool: 428 """Execute a mapping with a provided timeout. 429 430 Returns 431 ------- 432 success : bool 433 Whether the execution was successfull or not. 434 """ 435 config_file = f'{self._data_path}/{self.root_mount_directory}' + \ 436 '/config.properties' 437 arguments = ['-f', serialization] 438 self._headers = {} 439 return super()._execute_mapping(config_file, arguments, 440 mapping_file, output_file, 441 rdb_username, rdb_password, 442 rdb_host, rdb_port, rdb_name, rdb_type) 443 444 def execute_mapping(self, 445 mapping_file: str, 446 output_file: str, 447 serialization: str, 448 rdb_username: str, 449 rdb_password: str, 450 rdb_host: str, 451 rdb_port: int, 452 rdb_name: str, 453 rdb_type: str) -> bool: 454 """Execute a R2RML mapping with Ontop 455 456 N-Quads and N-Triples are currently supported as serialization 457 for Ontop materialize. Only relational databases are supported by 458 Ontop, thus the relational database parameters are mandantory. 459 460 Parameters 461 ---------- 462 mapping_file : str 463 Path to the mapping file to execute. 464 output_file : str 465 Name of the output file to store the triples in. This is not used 466 for OntopVirtualize. 467 serialization : str 468 Serialization format to use. 469 rdb_username : str 470 Username for the database. 471 rdb_password : str 472 Password for the database. 473 rdb_host : str 474 Hostname for the database. 475 rdb_port : int 476 Port for the database. 477 rdb_name : str 478 Database name for the database. 479 rdb_type : str 480 Database type. 481 482 Returns 483 ------- 484 success : bool 485 Whether the execution was successfull or not. 486 """ 487 try: 488 return self._execute_mapping_with_timeout(mapping_file, 489 output_file, 490 serialization, 491 rdb_username, 492 rdb_password, 493 rdb_host, 494 rdb_port, 495 rdb_name, 496 rdb_type) 497 except TimeoutError: 498 msg = f'Timeout ({TIMEOUT}s) reached for Ontop Materialize' 499 self._logger.warning(msg) 500 501 return False
28class Ontop(Container): 29 """Ontop container super class for OntopMaterialize and OntopVirtualize.""" 30 def __init__(self, name: str, data_path: str, logger: Logger, mode: str): 31 """Creates an instance of the Ontop class. 32 33 Parameters 34 ---------- 35 name : str 36 Pretty name of the container. 37 data_path: str 38 Path to the data directory of the case. 39 logger : Logger 40 Logger to use for log messages. 41 mode : str 42 Ontop mode: `materialize` or `endpoint` 43 """ 44 self._mode = mode 45 self._headers: Dict[str, Dict[str, str]] = {} 46 self._logger = logger 47 self._data_path = data_path 48 49 if self._mode == 'endpoint': 50 subdir = 'ontopvirtualize' 51 elif self._mode == 'materialize': 52 subdir = 'ontopmaterialize' 53 else: 54 raise ValueError(f'Unknown Ontop mode: "{self._mode}"') 55 os.makedirs(os.path.join(self._data_path, subdir), exist_ok=True) 56 57 # Set Java heap to 1/2 of available memory instead of the default 1/4 58 max_heap = int(psutil.virtual_memory().total * (1/2)) 59 60 # Configure logging 61 log_level = 'info' 62 if self._logger.verbose: 63 log_level = 'debug' 64 self._logger.info(f'Initialized Ontop logger at "{log_level}" level') 65 66 environment = {'ONTOP_JAVA_ARGS': f'-Xmx{max_heap} -Xms{max_heap}', 67 'ONTOP_LOG_LEVEL': log_level} 68 super().__init__(f'blindreviewing/ontop:v{VERSION}', name, 69 self._logger, 70 ports={'8888': '8888'}, 71 environment=environment, 72 volumes=[f'{self._data_path}/' 73 f'{self.root_mount_directory}:/data', 74 f'{self._data_path}/shared:/data/shared']) 75 76 @property 77 def root_mount_directory(self) -> str: 78 """Subdirectory in the root directory of the case for Ontop. 79 80 Returns 81 ------- 82 subdirectory : str 83 Subdirectory of the root directory for Ontop. 84 85 """ 86 if self._mode == 'endpoint': 87 return 'ontopvirtualize' 88 elif self._mode == 'materialize': 89 return 'ontopmaterialize' 90 else: 91 raise ValueError(f'Unknown Ontop mode: "{self._mode}"') 92 93 @property 94 def endpoint(self) -> str: 95 """SPARQL endpoint URL for Ontop. 96 97 Returns 98 ------- 99 url : str 100 SPARQL endpoint URL. 101 """ 102 return 'http://localhost:8888/sparql' 103 104 @property 105 def headers(self) -> dict: 106 """HTTP headers of SPARQL queries for serialization formats. 107 108 Only supported serialization formats are included in the dictionary. 109 Currently, the following formats are supported: 110 - N-Triples 111 - N-Quads 112 - Turtle 113 - CSV 114 - RDF/JSON 115 - RDF/XML 116 - JSON-LD 117 118 Returns 119 ------- 120 headers : dict 121 Dictionary of headers to use for each serialization format. 122 """ 123 return self._headers 124 125 def _execute(self, arguments: list) -> bool: 126 """Execute Ontop with given arguments. 127 128 Parameters 129 ---------- 130 arguments : list 131 Arguments to supply to Ontop. 132 133 Returns 134 ------- 135 success : bool 136 Whether the execution succeeded or not. 137 """ 138 139 cmd = f'/ontop/ontop {self._mode} {" ".join(arguments)}' 140 self._logger.info(f'Executing Ontop with command: {cmd}') 141 if self._mode == 'endpoint': 142 log_line = 'OntopEndpointApplication - Started ' + \ 143 'OntopEndpointApplication' 144 success = self.run_and_wait_for_log(log_line, cmd) 145 elif self._mode == 'materialize': 146 success = self.run_and_wait_for_exit(cmd) 147 else: 148 self._logger.error(f'Unknown Ontop mode "{self._mode}"') 149 success = False 150 151 return success 152 153 def _execute_mapping(self, 154 config_file: str, 155 arguments: list, 156 mapping_file: str, 157 output_file: Optional[str], 158 rdb_username: str, 159 rdb_password: str, 160 rdb_host: str, 161 rdb_port: int, 162 rdb_name: str, 163 rdb_type: str) -> bool: 164 """Execute a mapping file with Ontop. 165 166 Only relational databases are supported by 167 Ontop, thus the relational database parameters are mandantory. 168 169 Parameters 170 ---------- 171 config_file : str 172 Name of the generated config file for Ontop. 173 arguments : list 174 List of arguments to pass to Ontop. 175 mapping_file : str 176 Name of the mapping file to use. 177 output_file : Optional[str] 178 Name of the output file to use. Only applicable for 179 materialization. 180 rdb_username : str 181 Username for the database. 182 rdb_password : str 183 Password for the database. 184 rdb_host : str 185 Hostname for the database. 186 rdb_port : int 187 Port for the database. 188 rdb_name : str 189 Database name for the database. 190 rdb_type : str 191 Database type. 192 193 Returns 194 ------- 195 success : bool 196 Whether the execution was successfull or not. 197 """ 198 # Generate INI configuration file since no CLI is available 199 config = configparser.ConfigParser() 200 config['root'] = {} 201 if rdb_type == 'MySQL': 202 dsn = f'jdbc:mysql://{rdb_host}:{rdb_port}/{rdb_name}' 203 config['root']['jdbc.url'] = dsn 204 config['root']['jdbc.driver'] = 'com.mysql.cj.jdbc.Driver' 205 elif rdb_type == 'PostgreSQL': 206 dsn = f'jdbc:postgresql://{rdb_host}:{rdb_port}/{rdb_name}' 207 config['root']['jdbc.url'] = dsn 208 config['root']['jdbc.driver'] = 'org.postgresql.Driver' 209 else: 210 msg = f'Unknown RDB type: "{rdb_type}"' 211 self._logger.error(msg) 212 raise ValueError(msg) 213 config['root']['jdbc.user'] = rdb_username 214 config['root']['jdbc.password'] = rdb_password 215 216 path = os.path.join(self._data_path, self.root_mount_directory) 217 os.makedirs(path, exist_ok=True) 218 with open(os.path.join(path, 'config.properties'), 'w') as f: 219 config.write(f, space_around_delimiters=False) 220 221 # .properties files are like .ini files but without a [HEADER] 222 # Use a [root] header and remove it after writing 223 with open(os.path.join(path, 'config.properties'), 'r') as f: 224 data = f.read() 225 226 with open(os.path.join(path, 'config.properties'), 'w') as f: 227 f.write(data.replace('[root]\n', '')) 228 229 # Compatibility with Ontop requiring rr:class 230 # Replace any rdf:type construction with rr:class 231 # Without this, a strange error is raised: 'The definition of the 232 # predicate is not always a ground term triple(s,p,o)' 233 g = Graph() 234 g.bind('r2rml', R2RML) 235 g.bind('rdf', RDF) 236 g.parse(os.path.join(self._data_path, 'shared', 237 os.path.basename(mapping_file))) 238 239 for triples_map_iri, p, o in g.triples((None, RDF.type, 240 R2RML.TriplesMap)): 241 subject_map_iri = g.value(triples_map_iri, R2RML.subjectMap) 242 243 iter_pom = g.triples((triples_map_iri, 244 R2RML.predicateObjectMap, 245 None)) 246 for s, p, predicate_object_map_iri in iter_pom: 247 predicate_map_iri = g.value(predicate_object_map_iri, 248 R2RML.predicateMap) 249 object_map_iri = g.value(predicate_object_map_iri, 250 R2RML.objectMap) 251 252 if predicate_map_iri is None or object_map_iri is None: 253 continue 254 255 # Check if PredicateObjectMap is pointing to a PredicateMap 256 # specifying rdf:type. Skip this PredicateObjectMap if not 257 if g.value(predicate_map_iri, R2RML.constant) != RDF.type: 258 continue 259 260 # Retrieve the ObjectMap rr:constant value and add it as 261 # rr:class to the Subject Map is present 262 rdf_type_value = g.value(object_map_iri, R2RML.constant) 263 if rdf_type_value is not None: 264 iri = URIRef(rdf_type_value.toPython()) 265 g.add((subject_map_iri, R2RML['class'], iri)) 266 else: 267 msg = 'Cannot extract rr:class value, rdf:type value ' + \ 268 'is not a constant value!' 269 self._logger.error(msg) 270 return False 271 272 # Remove all triples associated with the rdf:type PredicateMap 273 for s, p, o in g.triples((predicate_map_iri, None, None)): 274 g.remove((s, p, o)) 275 276 # Remove all triples associated with the rdf:type ObjectMap 277 for s, p, o in g.triples((object_map_iri, None, None)): 278 g.remove((s, p, o)) 279 280 # Remove all triples associated with the 281 # rdf:type PredicateObjectMap 282 for s, p, o in g.triples((object_map_iri, None, None)): 283 g.remove((s, p, o)) 284 285 # Remove PredicateObjectMap from Triples Map 286 g.remove((triples_map_iri, R2RML.predicateObjectMap, 287 predicate_object_map_iri)) 288 289 destination = os.path.join(self._data_path, 290 self.root_mount_directory, 291 'mapping_converted.r2rml.ttl') 292 g.serialize(destination=destination, format='turtle') 293 294 arguments.append('-m') 295 arguments.append('/data/mapping_converted.r2rml.ttl') 296 if output_file is not None: 297 arguments.append('-o') 298 arguments.append(os.path.join('/data/shared/', output_file)) 299 arguments.append('-p') 300 arguments.append('/data/config.properties') 301 302 return self._execute(arguments)
Ontop container super class for OntopMaterialize and OntopVirtualize.
30 def __init__(self, name: str, data_path: str, logger: Logger, mode: str): 31 """Creates an instance of the Ontop class. 32 33 Parameters 34 ---------- 35 name : str 36 Pretty name of the container. 37 data_path: str 38 Path to the data directory of the case. 39 logger : Logger 40 Logger to use for log messages. 41 mode : str 42 Ontop mode: `materialize` or `endpoint` 43 """ 44 self._mode = mode 45 self._headers: Dict[str, Dict[str, str]] = {} 46 self._logger = logger 47 self._data_path = data_path 48 49 if self._mode == 'endpoint': 50 subdir = 'ontopvirtualize' 51 elif self._mode == 'materialize': 52 subdir = 'ontopmaterialize' 53 else: 54 raise ValueError(f'Unknown Ontop mode: "{self._mode}"') 55 os.makedirs(os.path.join(self._data_path, subdir), exist_ok=True) 56 57 # Set Java heap to 1/2 of available memory instead of the default 1/4 58 max_heap = int(psutil.virtual_memory().total * (1/2)) 59 60 # Configure logging 61 log_level = 'info' 62 if self._logger.verbose: 63 log_level = 'debug' 64 self._logger.info(f'Initialized Ontop logger at "{log_level}" level') 65 66 environment = {'ONTOP_JAVA_ARGS': f'-Xmx{max_heap} -Xms{max_heap}', 67 'ONTOP_LOG_LEVEL': log_level} 68 super().__init__(f'blindreviewing/ontop:v{VERSION}', name, 69 self._logger, 70 ports={'8888': '8888'}, 71 environment=environment, 72 volumes=[f'{self._data_path}/' 73 f'{self.root_mount_directory}:/data', 74 f'{self._data_path}/shared:/data/shared'])
Creates an instance of the Ontop class.
Parameters
- name (str): Pretty name of the container.
- data_path (str): Path to the data directory of the case.
- logger (Logger): Logger to use for log messages.
- mode (str):
Ontop mode:
materialize
orendpoint
Subdirectory in the root directory of the case for Ontop.
Returns
- subdirectory (str): Subdirectory of the root directory for Ontop.
HTTP headers of SPARQL queries for serialization formats.
Only supported serialization formats are included in the dictionary. Currently, the following formats are supported:
- N-Triples
- N-Quads
- Turtle
- CSV
- RDF/JSON
- RDF/XML
- JSON-LD
Returns
- headers (dict): Dictionary of headers to use for each serialization format.
Inherited Members
305class OntopVirtualize(Ontop): 306 """OntopVirtualize container for setting up an Ontop SPARQL endpoint.""" 307 def __init__(self, data_path: str, config_path: str, directory: str, 308 verbose: bool): 309 """Creates an instance of the OntopVirtualize class. 310 311 Parameters 312 ---------- 313 data_path : str 314 Path to the data directory of the case. 315 config_path : str 316 Path to the config directory of the case. 317 directory : str 318 Path to the directory to store logs. 319 verbose : bool 320 Enable verbose logs. 321 """ 322 self._data_path = os.path.abspath(data_path) 323 self._config_path = os.path.abspath(config_path) 324 self._logger = Logger(__name__, directory, verbose) 325 super().__init__('Ontop-Virtualize', self._data_path, self._logger, 326 'endpoint') 327 328 def execute_mapping(self, 329 mapping_file: str, 330 serialization: str, 331 rdb_username: str, 332 rdb_password: str, 333 rdb_host: str, 334 rdb_port: int, 335 rdb_name: str, 336 rdb_type: str) -> bool: 337 """Start an Ontop SPARQL endpoint with a mapping. 338 339 Only relational databases are supported by 340 Ontop, thus the relational database parameters are mandantory. 341 Ontop SPARQL endpoint supports the following serialization formats: 342 - N-Triples (Ontop v5+) 343 - N-Quads (Ontop v5+) 344 - Turtle 345 - RDF/JSON 346 - JSON-LD 347 - CSV 348 349 Parameters 350 ---------- 351 mapping_file : str 352 Path to the mapping file to execute. 353 serialization : str 354 Serialization format to use. 355 rdb_username : str 356 Username for the database. 357 rdb_password : str 358 Password for the database. 359 rdb_host : str 360 Hostname for the database. 361 rdb_port : int 362 Port for the database. 363 rdb_name : str 364 Database name for the database. 365 rdb_type : str 366 Database type. 367 368 Returns 369 ------- 370 success : bool 371 Whether the execution was successfull or not. 372 """ 373 config_file = f'{self._data_path}/{self.root_mount_directory}' + \ 374 '/config.properties' 375 arguments = ['--cors-allowed-origins=*', '--port=8888'] 376 self._headers['ntriples'] = {'Accept': 'application/n-triples'} 377 self._headers['nquads'] = {'Accept': 'application/n-quads'} 378 self._headers['turtle'] = {'Accept': 'text/turtle'} 379 self._headers['rdfjson'] = {'Accept': 'application/rdf+json'} 380 self._headers['rdfxml'] = {'Accept': 'application/rdf+xml'} 381 self._headers['jsonld'] = {'Accept': 'application/ld+json'} 382 self._headers['csv'] = {'Accept': 'text/csv'} 383 if serialization not in self._headers.keys(): 384 msg = 'Unsupported serialization format ' + \ 385 f'"{serialization}" for Ontop' 386 self._logger.error(msg) 387 raise ValueError(msg) 388 return super()._execute_mapping(config_file, arguments, 389 mapping_file, None, rdb_username, 390 rdb_password, rdb_host, rdb_port, 391 rdb_name, rdb_type)
OntopVirtualize container for setting up an Ontop SPARQL endpoint.
307 def __init__(self, data_path: str, config_path: str, directory: str, 308 verbose: bool): 309 """Creates an instance of the OntopVirtualize class. 310 311 Parameters 312 ---------- 313 data_path : str 314 Path to the data directory of the case. 315 config_path : str 316 Path to the config directory of the case. 317 directory : str 318 Path to the directory to store logs. 319 verbose : bool 320 Enable verbose logs. 321 """ 322 self._data_path = os.path.abspath(data_path) 323 self._config_path = os.path.abspath(config_path) 324 self._logger = Logger(__name__, directory, verbose) 325 super().__init__('Ontop-Virtualize', self._data_path, self._logger, 326 'endpoint')
Creates an instance of the OntopVirtualize class.
Parameters
- data_path (str): Path to the data directory of the case.
- config_path (str): Path to the config directory of the case.
- directory (str): Path to the directory to store logs.
- verbose (bool): Enable verbose logs.
328 def execute_mapping(self, 329 mapping_file: str, 330 serialization: str, 331 rdb_username: str, 332 rdb_password: str, 333 rdb_host: str, 334 rdb_port: int, 335 rdb_name: str, 336 rdb_type: str) -> bool: 337 """Start an Ontop SPARQL endpoint with a mapping. 338 339 Only relational databases are supported by 340 Ontop, thus the relational database parameters are mandantory. 341 Ontop SPARQL endpoint supports the following serialization formats: 342 - N-Triples (Ontop v5+) 343 - N-Quads (Ontop v5+) 344 - Turtle 345 - RDF/JSON 346 - JSON-LD 347 - CSV 348 349 Parameters 350 ---------- 351 mapping_file : str 352 Path to the mapping file to execute. 353 serialization : str 354 Serialization format to use. 355 rdb_username : str 356 Username for the database. 357 rdb_password : str 358 Password for the database. 359 rdb_host : str 360 Hostname for the database. 361 rdb_port : int 362 Port for the database. 363 rdb_name : str 364 Database name for the database. 365 rdb_type : str 366 Database type. 367 368 Returns 369 ------- 370 success : bool 371 Whether the execution was successfull or not. 372 """ 373 config_file = f'{self._data_path}/{self.root_mount_directory}' + \ 374 '/config.properties' 375 arguments = ['--cors-allowed-origins=*', '--port=8888'] 376 self._headers['ntriples'] = {'Accept': 'application/n-triples'} 377 self._headers['nquads'] = {'Accept': 'application/n-quads'} 378 self._headers['turtle'] = {'Accept': 'text/turtle'} 379 self._headers['rdfjson'] = {'Accept': 'application/rdf+json'} 380 self._headers['rdfxml'] = {'Accept': 'application/rdf+xml'} 381 self._headers['jsonld'] = {'Accept': 'application/ld+json'} 382 self._headers['csv'] = {'Accept': 'text/csv'} 383 if serialization not in self._headers.keys(): 384 msg = 'Unsupported serialization format ' + \ 385 f'"{serialization}" for Ontop' 386 self._logger.error(msg) 387 raise ValueError(msg) 388 return super()._execute_mapping(config_file, arguments, 389 mapping_file, None, rdb_username, 390 rdb_password, rdb_host, rdb_port, 391 rdb_name, rdb_type)
Start an Ontop SPARQL endpoint with a mapping.
Only relational databases are supported by Ontop, thus the relational database parameters are mandantory. Ontop SPARQL endpoint supports the following serialization formats:
- N-Triples (Ontop v5+)
- N-Quads (Ontop v5+)
- Turtle
- RDF/JSON
- JSON-LD
- CSV
Parameters
- mapping_file (str): Path to the mapping file to execute.
- serialization (str): Serialization format to use.
- rdb_username (str): Username for the database.
- rdb_password (str): Password for the database.
- rdb_host (str): Hostname for the database.
- rdb_port (int): Port for the database.
- rdb_name (str): Database name for the database.
- rdb_type (str): Database type.
Returns
- success (bool): Whether the execution was successfull or not.
394class OntopMaterialize(Ontop): 395 """OntopMaterialize container to execute a R2RML mapping.""" 396 def __init__(self, data_path: str, config_path: str, directory: str, 397 verbose: bool): 398 """Creates an instance of the OntopMaterialize class. 399 400 Parameters 401 ---------- 402 data_path : str 403 Path to the data directory of the case. 404 config_path : str 405 Path to the config directory of the case. 406 directory : str 407 Path to the directory to store logs. 408 verbose : bool 409 Enable verbose logs. 410 """ 411 self._data_path = os.path.abspath(data_path) 412 self._config_path = os.path.abspath(config_path) 413 self._logger = Logger(__name__, directory, verbose) 414 os.makedirs(os.path.join(self._data_path, 'ontopmaterialize'), 415 exist_ok=True) 416 super().__init__('Ontop-Materialize', self._data_path, self._logger, 417 'materialize') 418 419 @timeout(TIMEOUT) 420 def _execute_mapping_with_timeout(self, mapping_file: str, 421 output_file: str, 422 serialization: str, 423 rdb_username: str, 424 rdb_password: str, 425 rdb_host: str, 426 rdb_port: int, 427 rdb_name: str, 428 rdb_type: str) -> bool: 429 """Execute a mapping with a provided timeout. 430 431 Returns 432 ------- 433 success : bool 434 Whether the execution was successfull or not. 435 """ 436 config_file = f'{self._data_path}/{self.root_mount_directory}' + \ 437 '/config.properties' 438 arguments = ['-f', serialization] 439 self._headers = {} 440 return super()._execute_mapping(config_file, arguments, 441 mapping_file, output_file, 442 rdb_username, rdb_password, 443 rdb_host, rdb_port, rdb_name, rdb_type) 444 445 def execute_mapping(self, 446 mapping_file: str, 447 output_file: str, 448 serialization: str, 449 rdb_username: str, 450 rdb_password: str, 451 rdb_host: str, 452 rdb_port: int, 453 rdb_name: str, 454 rdb_type: str) -> bool: 455 """Execute a R2RML mapping with Ontop 456 457 N-Quads and N-Triples are currently supported as serialization 458 for Ontop materialize. Only relational databases are supported by 459 Ontop, thus the relational database parameters are mandantory. 460 461 Parameters 462 ---------- 463 mapping_file : str 464 Path to the mapping file to execute. 465 output_file : str 466 Name of the output file to store the triples in. This is not used 467 for OntopVirtualize. 468 serialization : str 469 Serialization format to use. 470 rdb_username : str 471 Username for the database. 472 rdb_password : str 473 Password for the database. 474 rdb_host : str 475 Hostname for the database. 476 rdb_port : int 477 Port for the database. 478 rdb_name : str 479 Database name for the database. 480 rdb_type : str 481 Database type. 482 483 Returns 484 ------- 485 success : bool 486 Whether the execution was successfull or not. 487 """ 488 try: 489 return self._execute_mapping_with_timeout(mapping_file, 490 output_file, 491 serialization, 492 rdb_username, 493 rdb_password, 494 rdb_host, 495 rdb_port, 496 rdb_name, 497 rdb_type) 498 except TimeoutError: 499 msg = f'Timeout ({TIMEOUT}s) reached for Ontop Materialize' 500 self._logger.warning(msg) 501 502 return False
OntopMaterialize container to execute a R2RML mapping.
396 def __init__(self, data_path: str, config_path: str, directory: str, 397 verbose: bool): 398 """Creates an instance of the OntopMaterialize class. 399 400 Parameters 401 ---------- 402 data_path : str 403 Path to the data directory of the case. 404 config_path : str 405 Path to the config directory of the case. 406 directory : str 407 Path to the directory to store logs. 408 verbose : bool 409 Enable verbose logs. 410 """ 411 self._data_path = os.path.abspath(data_path) 412 self._config_path = os.path.abspath(config_path) 413 self._logger = Logger(__name__, directory, verbose) 414 os.makedirs(os.path.join(self._data_path, 'ontopmaterialize'), 415 exist_ok=True) 416 super().__init__('Ontop-Materialize', self._data_path, self._logger, 417 'materialize')
Creates an instance of the OntopMaterialize class.
Parameters
- data_path (str): Path to the data directory of the case.
- config_path (str): Path to the config directory of the case.
- directory (str): Path to the directory to store logs.
- verbose (bool): Enable verbose logs.
445 def execute_mapping(self, 446 mapping_file: str, 447 output_file: str, 448 serialization: str, 449 rdb_username: str, 450 rdb_password: str, 451 rdb_host: str, 452 rdb_port: int, 453 rdb_name: str, 454 rdb_type: str) -> bool: 455 """Execute a R2RML mapping with Ontop 456 457 N-Quads and N-Triples are currently supported as serialization 458 for Ontop materialize. Only relational databases are supported by 459 Ontop, thus the relational database parameters are mandantory. 460 461 Parameters 462 ---------- 463 mapping_file : str 464 Path to the mapping file to execute. 465 output_file : str 466 Name of the output file to store the triples in. This is not used 467 for OntopVirtualize. 468 serialization : str 469 Serialization format to use. 470 rdb_username : str 471 Username for the database. 472 rdb_password : str 473 Password for the database. 474 rdb_host : str 475 Hostname for the database. 476 rdb_port : int 477 Port for the database. 478 rdb_name : str 479 Database name for the database. 480 rdb_type : str 481 Database type. 482 483 Returns 484 ------- 485 success : bool 486 Whether the execution was successfull or not. 487 """ 488 try: 489 return self._execute_mapping_with_timeout(mapping_file, 490 output_file, 491 serialization, 492 rdb_username, 493 rdb_password, 494 rdb_host, 495 rdb_port, 496 rdb_name, 497 rdb_type) 498 except TimeoutError: 499 msg = f'Timeout ({TIMEOUT}s) reached for Ontop Materialize' 500 self._logger.warning(msg) 501 502 return False
Execute a R2RML mapping with Ontop
N-Quads and N-Triples are currently supported as serialization for Ontop materialize. Only relational databases are supported by Ontop, thus the relational database parameters are mandantory.
Parameters
- mapping_file (str): Path to the mapping file to execute.
- output_file (str): Name of the output file to store the triples in. This is not used for OntopVirtualize.
- serialization (str): Serialization format to use.
- rdb_username (str): Username for the database.
- rdb_password (str): Password for the database.
- rdb_host (str): Hostname for the database.
- rdb_port (int): Port for the database.
- rdb_name (str): Database name for the database.
- rdb_type (str): Database type.
Returns
- success (bool): Whether the execution was successfull or not.