bench_executor.morphrdb
Morph-RDB (formerly called ODEMapster) is an RDB2RDF engine developed by the Ontology Engineering Group, which follows the R2RML specification.
Website: https://oeg.fi.upm.es/index.php/en/technologies/315-morph-rdb/index.html
Repository: https://github.com/oeg-upm/morph-rdb
1#!/usr/bin/env python3 2 3""" 4Morph-RDB (formerly called ODEMapster) is an RDB2RDF engine developed by 5the Ontology Engineering Group, which follows the R2RML specification. 6 7**Website**: https://oeg.fi.upm.es/index.php/en/technologies/315-morph-rdb/index.html <br> 8**Repository**: https://github.com/oeg-upm/morph-rdb 9""" # noqa: E501 10 11import os 12import psutil 13import configparser 14from timeout_decorator import timeout, TimeoutError # type: ignore 15from bench_executor.container import Container 16from bench_executor.logger import Logger 17 18VERSION = '3.12.5' 19TIMEOUT = 6 * 3600 # 6 hours 20 21 22class MorphRDB(Container): 23 """Morph-RDB container for executing R2RML mappings.""" 24 def __init__(self, data_path: str, config_path: str, directory: str, 25 verbose: bool): 26 """Creates an instance of the MorphRDB class. 27 28 Parameters 29 ---------- 30 data_path : str 31 Path to the data directory of the case. 32 config_path : str 33 Path to the config directory of the case. 34 directory : str 35 Path to the directory to store logs. 36 verbose : bool 37 Enable verbose logs. 38 """ 39 self._data_path = os.path.abspath(data_path) 40 self._config_path = os.path.abspath(config_path) 41 self._logger = Logger(__name__, directory, verbose) 42 43 os.umask(0) 44 os.makedirs(os.path.join(self._data_path, 'morphrdb'), exist_ok=True) 45 super().__init__(f'blindreviewing/morph-rdb:v{VERSION}', 'Morph-RDB', 46 self._logger, 47 volumes=[f'{self._data_path}/shared:/data/shared', 48 f'{self._data_path}/morphrdb:/data']) 49 50 @property 51 def root_mount_directory(self) -> str: 52 """Subdirectory in the root directory of the case for Morph-RDB. 53 54 Returns 55 ------- 56 subdirectory : str 57 Subdirectory of the root directory for Morph-RDB. 58 59 """ 60 return __name__.lower() 61 62 @timeout(TIMEOUT) 63 def _execute_with_timeout(self, arguments) -> bool: 64 """Execute a mapping with a provided timeout. 65 66 Returns 67 ------- 68 success : bool 69 Whether the execution was successfull or not. 70 """ 71 # Set Java heap to 1/2 of available memory instead of the default 1/4 72 max_heap = int(psutil.virtual_memory().total * (1/2)) 73 74 # Execute command 75 cmd = f'java -Xmx{max_heap} -Xms{max_heap} ' + \ 76 '-cp .:morph-rdb-dist-3.12.6.jar:dependency/* ' + \ 77 'es.upm.fi.dia.oeg.morph.r2rml.rdb.engine.MorphRDBRunner ' + \ 78 '/data config.properties' 79 success = self.run_and_wait_for_exit(cmd) 80 81 return success 82 83 def execute(self, arguments: list) -> bool: 84 """Execute Morph-KGC with the given arguments. 85 86 Parameters 87 ---------- 88 arguments : list 89 Additional arguments to supply to Morph-KGC. 90 91 Returns 92 ------- 93 success : bool 94 Whether the execution succeeded or not. 95 """ 96 try: 97 return self._execute_with_timeout(arguments) 98 except TimeoutError: 99 msg = f'Timeout ({TIMEOUT}s) reached for Morph-RDB' 100 self._logger.warning(msg) 101 102 return False 103 104 def execute_mapping(self, mapping_file: str, output_file: str, 105 serialization: str, rdb_username: str, 106 rdb_password: str, rdb_host: str, 107 rdb_port: int, rdb_name: str, 108 rdb_type: str) -> bool: 109 """Execute a mapping file with Morph-RDB. 110 111 N-Quads and N-Triples are currently supported as serialization 112 format for Morph-RDB. Only relational databases are supported by 113 Morph-RDB, thus the relational database parameters are mandantory. 114 115 Parameters 116 ---------- 117 mapping_file : str 118 Path to the mapping file to execute. 119 output_file : str 120 Name of the output file to store the triples in. 121 serialization : str 122 Serialization format to use. 123 rdb_username : str 124 Username for the database. 125 rdb_password : str 126 Password for the database. 127 rdb_host : str 128 Hostname for the database. 129 rdb_port : int 130 Port for the database. 131 rdb_name : str 132 Database name for the database. 133 rdb_type : str 134 Database type. 135 136 Returns 137 ------- 138 success : bool 139 Whether the execution was successfull or not. 140 """ 141 142 if serialization == 'nquads': 143 serialization = 'N-QUADS' 144 elif serialization == 'ntriples': 145 serialization = 'N-TRIPLE' 146 else: 147 raise NotImplementedError('Unsupported serialization: ' 148 f'"{serialization}"') 149 150 # Generate INI configuration file since no CLI is available 151 config = configparser.ConfigParser() 152 mapping_file = os.path.join('shared', os.path.basename(mapping_file)) 153 output_file = os.path.join('shared', os.path.basename(output_file)) 154 config['root'] = { 155 'mappingdocument.file.path': mapping_file, 156 'output.file.path': output_file, 157 'output.rdflanguage': serialization, 158 } 159 160 config['root']['database.name[0]'] = rdb_name 161 if rdb_type == 'MySQL': 162 config['root']['database.driver[0]'] = 'com.mysql.jdbc.Driver' 163 config['root']['database.type[0]'] = 'mysql' 164 dsn = f'jdbc:mysql://{rdb_host}:{rdb_port}/{rdb_name}' + \ 165 '?allowPublicKeyRetrieval=true&useSSL=false' 166 config['root']['database.url[0]'] = dsn 167 elif rdb_type == 'PostgreSQL': 168 config['root']['database.driver[0]'] = 'org.postgresql.Driver' 169 config['root']['database.type[0]'] = 'postgresql' 170 dsn = f'jdbc:postgresql://{rdb_host}:{rdb_port}/{rdb_name}' 171 config['root']['database.url[0]'] = dsn 172 else: 173 raise ValueError(f'Unknown RDB type: "{rdb_type}"') 174 config['root']['database.user[0]'] = rdb_username 175 config['root']['database.pwd[0]'] = rdb_password 176 config['root']['no_of_database'] = '1' 177 178 path = os.path.join(self._data_path, 'morphrdb') 179 os.umask(0) 180 os.makedirs(path, exist_ok=True) 181 with open(os.path.join(path, 'config.properties'), 'w') as f: 182 config.write(f, space_around_delimiters=False) 183 184 # .properties files are like .ini files but without a [HEADER] 185 # Use a [root] header and remove it after writing 186 with open(os.path.join(path, 'config.properties'), 'r') as f: 187 data = f.read() 188 189 with open(os.path.join(path, 'config.properties'), 'w') as f: 190 f.write(data.replace('[root]\n', '')) 191 192 return self.execute([])
23class MorphRDB(Container): 24 """Morph-RDB container for executing R2RML mappings.""" 25 def __init__(self, data_path: str, config_path: str, directory: str, 26 verbose: bool): 27 """Creates an instance of the MorphRDB class. 28 29 Parameters 30 ---------- 31 data_path : str 32 Path to the data directory of the case. 33 config_path : str 34 Path to the config directory of the case. 35 directory : str 36 Path to the directory to store logs. 37 verbose : bool 38 Enable verbose logs. 39 """ 40 self._data_path = os.path.abspath(data_path) 41 self._config_path = os.path.abspath(config_path) 42 self._logger = Logger(__name__, directory, verbose) 43 44 os.umask(0) 45 os.makedirs(os.path.join(self._data_path, 'morphrdb'), exist_ok=True) 46 super().__init__(f'blindreviewing/morph-rdb:v{VERSION}', 'Morph-RDB', 47 self._logger, 48 volumes=[f'{self._data_path}/shared:/data/shared', 49 f'{self._data_path}/morphrdb:/data']) 50 51 @property 52 def root_mount_directory(self) -> str: 53 """Subdirectory in the root directory of the case for Morph-RDB. 54 55 Returns 56 ------- 57 subdirectory : str 58 Subdirectory of the root directory for Morph-RDB. 59 60 """ 61 return __name__.lower() 62 63 @timeout(TIMEOUT) 64 def _execute_with_timeout(self, arguments) -> bool: 65 """Execute a mapping with a provided timeout. 66 67 Returns 68 ------- 69 success : bool 70 Whether the execution was successfull or not. 71 """ 72 # Set Java heap to 1/2 of available memory instead of the default 1/4 73 max_heap = int(psutil.virtual_memory().total * (1/2)) 74 75 # Execute command 76 cmd = f'java -Xmx{max_heap} -Xms{max_heap} ' + \ 77 '-cp .:morph-rdb-dist-3.12.6.jar:dependency/* ' + \ 78 'es.upm.fi.dia.oeg.morph.r2rml.rdb.engine.MorphRDBRunner ' + \ 79 '/data config.properties' 80 success = self.run_and_wait_for_exit(cmd) 81 82 return success 83 84 def execute(self, arguments: list) -> bool: 85 """Execute Morph-KGC with the given arguments. 86 87 Parameters 88 ---------- 89 arguments : list 90 Additional arguments to supply to Morph-KGC. 91 92 Returns 93 ------- 94 success : bool 95 Whether the execution succeeded or not. 96 """ 97 try: 98 return self._execute_with_timeout(arguments) 99 except TimeoutError: 100 msg = f'Timeout ({TIMEOUT}s) reached for Morph-RDB' 101 self._logger.warning(msg) 102 103 return False 104 105 def execute_mapping(self, mapping_file: str, output_file: str, 106 serialization: str, rdb_username: str, 107 rdb_password: str, rdb_host: str, 108 rdb_port: int, rdb_name: str, 109 rdb_type: str) -> bool: 110 """Execute a mapping file with Morph-RDB. 111 112 N-Quads and N-Triples are currently supported as serialization 113 format for Morph-RDB. Only relational databases are supported by 114 Morph-RDB, thus the relational database parameters are mandantory. 115 116 Parameters 117 ---------- 118 mapping_file : str 119 Path to the mapping file to execute. 120 output_file : str 121 Name of the output file to store the triples in. 122 serialization : str 123 Serialization format to use. 124 rdb_username : str 125 Username for the database. 126 rdb_password : str 127 Password for the database. 128 rdb_host : str 129 Hostname for the database. 130 rdb_port : int 131 Port for the database. 132 rdb_name : str 133 Database name for the database. 134 rdb_type : str 135 Database type. 136 137 Returns 138 ------- 139 success : bool 140 Whether the execution was successfull or not. 141 """ 142 143 if serialization == 'nquads': 144 serialization = 'N-QUADS' 145 elif serialization == 'ntriples': 146 serialization = 'N-TRIPLE' 147 else: 148 raise NotImplementedError('Unsupported serialization: ' 149 f'"{serialization}"') 150 151 # Generate INI configuration file since no CLI is available 152 config = configparser.ConfigParser() 153 mapping_file = os.path.join('shared', os.path.basename(mapping_file)) 154 output_file = os.path.join('shared', os.path.basename(output_file)) 155 config['root'] = { 156 'mappingdocument.file.path': mapping_file, 157 'output.file.path': output_file, 158 'output.rdflanguage': serialization, 159 } 160 161 config['root']['database.name[0]'] = rdb_name 162 if rdb_type == 'MySQL': 163 config['root']['database.driver[0]'] = 'com.mysql.jdbc.Driver' 164 config['root']['database.type[0]'] = 'mysql' 165 dsn = f'jdbc:mysql://{rdb_host}:{rdb_port}/{rdb_name}' + \ 166 '?allowPublicKeyRetrieval=true&useSSL=false' 167 config['root']['database.url[0]'] = dsn 168 elif rdb_type == 'PostgreSQL': 169 config['root']['database.driver[0]'] = 'org.postgresql.Driver' 170 config['root']['database.type[0]'] = 'postgresql' 171 dsn = f'jdbc:postgresql://{rdb_host}:{rdb_port}/{rdb_name}' 172 config['root']['database.url[0]'] = dsn 173 else: 174 raise ValueError(f'Unknown RDB type: "{rdb_type}"') 175 config['root']['database.user[0]'] = rdb_username 176 config['root']['database.pwd[0]'] = rdb_password 177 config['root']['no_of_database'] = '1' 178 179 path = os.path.join(self._data_path, 'morphrdb') 180 os.umask(0) 181 os.makedirs(path, exist_ok=True) 182 with open(os.path.join(path, 'config.properties'), 'w') as f: 183 config.write(f, space_around_delimiters=False) 184 185 # .properties files are like .ini files but without a [HEADER] 186 # Use a [root] header and remove it after writing 187 with open(os.path.join(path, 'config.properties'), 'r') as f: 188 data = f.read() 189 190 with open(os.path.join(path, 'config.properties'), 'w') as f: 191 f.write(data.replace('[root]\n', '')) 192 193 return self.execute([])
Morph-RDB container for executing R2RML mappings.
MorphRDB(data_path: str, config_path: str, directory: str, verbose: bool)
25 def __init__(self, data_path: str, config_path: str, directory: str, 26 verbose: bool): 27 """Creates an instance of the MorphRDB class. 28 29 Parameters 30 ---------- 31 data_path : str 32 Path to the data directory of the case. 33 config_path : str 34 Path to the config directory of the case. 35 directory : str 36 Path to the directory to store logs. 37 verbose : bool 38 Enable verbose logs. 39 """ 40 self._data_path = os.path.abspath(data_path) 41 self._config_path = os.path.abspath(config_path) 42 self._logger = Logger(__name__, directory, verbose) 43 44 os.umask(0) 45 os.makedirs(os.path.join(self._data_path, 'morphrdb'), exist_ok=True) 46 super().__init__(f'blindreviewing/morph-rdb:v{VERSION}', 'Morph-RDB', 47 self._logger, 48 volumes=[f'{self._data_path}/shared:/data/shared', 49 f'{self._data_path}/morphrdb:/data'])
Creates an instance of the MorphRDB class.
Parameters
- data_path (str): Path to the data directory of the case.
- config_path (str): Path to the config directory of the case.
- directory (str): Path to the directory to store logs.
- verbose (bool): Enable verbose logs.
root_mount_directory: str
Subdirectory in the root directory of the case for Morph-RDB.
Returns
- subdirectory (str): Subdirectory of the root directory for Morph-RDB.
def
execute(self, arguments: list) -> bool:
84 def execute(self, arguments: list) -> bool: 85 """Execute Morph-KGC with the given arguments. 86 87 Parameters 88 ---------- 89 arguments : list 90 Additional arguments to supply to Morph-KGC. 91 92 Returns 93 ------- 94 success : bool 95 Whether the execution succeeded or not. 96 """ 97 try: 98 return self._execute_with_timeout(arguments) 99 except TimeoutError: 100 msg = f'Timeout ({TIMEOUT}s) reached for Morph-RDB' 101 self._logger.warning(msg) 102 103 return False
Execute Morph-KGC with the given arguments.
Parameters
- arguments (list): Additional arguments to supply to Morph-KGC.
Returns
- success (bool): Whether the execution succeeded or not.
def
execute_mapping( self, mapping_file: str, output_file: str, serialization: str, rdb_username: str, rdb_password: str, rdb_host: str, rdb_port: int, rdb_name: str, rdb_type: str) -> bool:
105 def execute_mapping(self, mapping_file: str, output_file: str, 106 serialization: str, rdb_username: str, 107 rdb_password: str, rdb_host: str, 108 rdb_port: int, rdb_name: str, 109 rdb_type: str) -> bool: 110 """Execute a mapping file with Morph-RDB. 111 112 N-Quads and N-Triples are currently supported as serialization 113 format for Morph-RDB. Only relational databases are supported by 114 Morph-RDB, thus the relational database parameters are mandantory. 115 116 Parameters 117 ---------- 118 mapping_file : str 119 Path to the mapping file to execute. 120 output_file : str 121 Name of the output file to store the triples in. 122 serialization : str 123 Serialization format to use. 124 rdb_username : str 125 Username for the database. 126 rdb_password : str 127 Password for the database. 128 rdb_host : str 129 Hostname for the database. 130 rdb_port : int 131 Port for the database. 132 rdb_name : str 133 Database name for the database. 134 rdb_type : str 135 Database type. 136 137 Returns 138 ------- 139 success : bool 140 Whether the execution was successfull or not. 141 """ 142 143 if serialization == 'nquads': 144 serialization = 'N-QUADS' 145 elif serialization == 'ntriples': 146 serialization = 'N-TRIPLE' 147 else: 148 raise NotImplementedError('Unsupported serialization: ' 149 f'"{serialization}"') 150 151 # Generate INI configuration file since no CLI is available 152 config = configparser.ConfigParser() 153 mapping_file = os.path.join('shared', os.path.basename(mapping_file)) 154 output_file = os.path.join('shared', os.path.basename(output_file)) 155 config['root'] = { 156 'mappingdocument.file.path': mapping_file, 157 'output.file.path': output_file, 158 'output.rdflanguage': serialization, 159 } 160 161 config['root']['database.name[0]'] = rdb_name 162 if rdb_type == 'MySQL': 163 config['root']['database.driver[0]'] = 'com.mysql.jdbc.Driver' 164 config['root']['database.type[0]'] = 'mysql' 165 dsn = f'jdbc:mysql://{rdb_host}:{rdb_port}/{rdb_name}' + \ 166 '?allowPublicKeyRetrieval=true&useSSL=false' 167 config['root']['database.url[0]'] = dsn 168 elif rdb_type == 'PostgreSQL': 169 config['root']['database.driver[0]'] = 'org.postgresql.Driver' 170 config['root']['database.type[0]'] = 'postgresql' 171 dsn = f'jdbc:postgresql://{rdb_host}:{rdb_port}/{rdb_name}' 172 config['root']['database.url[0]'] = dsn 173 else: 174 raise ValueError(f'Unknown RDB type: "{rdb_type}"') 175 config['root']['database.user[0]'] = rdb_username 176 config['root']['database.pwd[0]'] = rdb_password 177 config['root']['no_of_database'] = '1' 178 179 path = os.path.join(self._data_path, 'morphrdb') 180 os.umask(0) 181 os.makedirs(path, exist_ok=True) 182 with open(os.path.join(path, 'config.properties'), 'w') as f: 183 config.write(f, space_around_delimiters=False) 184 185 # .properties files are like .ini files but without a [HEADER] 186 # Use a [root] header and remove it after writing 187 with open(os.path.join(path, 'config.properties'), 'r') as f: 188 data = f.read() 189 190 with open(os.path.join(path, 'config.properties'), 'w') as f: 191 f.write(data.replace('[root]\n', '')) 192 193 return self.execute([])
Execute a mapping file with Morph-RDB.
N-Quads and N-Triples are currently supported as serialization format for Morph-RDB. Only relational databases are supported by Morph-RDB, thus the relational database parameters are mandantory.
Parameters
- mapping_file (str): Path to the mapping file to execute.
- output_file (str): Name of the output file to store the triples in.
- serialization (str): Serialization format to use.
- rdb_username (str): Username for the database.
- rdb_password (str): Password for the database.
- rdb_host (str): Hostname for the database.
- rdb_port (int): Port for the database.
- rdb_name (str): Database name for the database.
- rdb_type (str): Database type.
Returns
- success (bool): Whether the execution was successfull or not.