bench_executor.morphrdb

Morph-RDB (formerly called ODEMapster) is an RDB2RDF engine developed by the Ontology Engineering Group, which follows the R2RML specification.

Website: https://oeg.fi.upm.es/index.php/en/technologies/315-morph-rdb/index.html
Repository: https://github.com/oeg-upm/morph-rdb

  1#!/usr/bin/env python3
  2
  3"""
  4Morph-RDB (formerly called ODEMapster) is an RDB2RDF engine developed by
  5the Ontology Engineering Group, which follows the R2RML specification.
  6
  7**Website**: https://oeg.fi.upm.es/index.php/en/technologies/315-morph-rdb/index.html <br>
  8**Repository**: https://github.com/oeg-upm/morph-rdb
  9"""  # noqa: E501
 10
 11import os
 12import psutil
 13import configparser
 14from timeout_decorator import timeout, TimeoutError  # type: ignore
 15from bench_executor.container import Container
 16from bench_executor.logger import Logger
 17
 18VERSION = '3.12.5'
 19TIMEOUT = 6 * 3600  # 6 hours
 20
 21
 22class MorphRDB(Container):
 23    """Morph-RDB container for executing R2RML mappings."""
 24    def __init__(self, data_path: str, config_path: str, directory: str,
 25                 verbose: bool):
 26        """Creates an instance of the MorphRDB class.
 27
 28        Parameters
 29        ----------
 30        data_path : str
 31            Path to the data directory of the case.
 32        config_path : str
 33            Path to the config directory of the case.
 34        directory : str
 35            Path to the directory to store logs.
 36        verbose : bool
 37            Enable verbose logs.
 38        """
 39        self._data_path = os.path.abspath(data_path)
 40        self._config_path = os.path.abspath(config_path)
 41        self._logger = Logger(__name__, directory, verbose)
 42
 43        os.umask(0)
 44        os.makedirs(os.path.join(self._data_path, 'morphrdb'), exist_ok=True)
 45        super().__init__(f'blindreviewing/morph-rdb:v{VERSION}', 'Morph-RDB',
 46                         self._logger,
 47                         volumes=[f'{self._data_path}/shared:/data/shared',
 48                                  f'{self._data_path}/morphrdb:/data'])
 49
 50    @property
 51    def root_mount_directory(self) -> str:
 52        """Subdirectory in the root directory of the case for Morph-RDB.
 53
 54        Returns
 55        -------
 56        subdirectory : str
 57            Subdirectory of the root directory for Morph-RDB.
 58
 59        """
 60        return __name__.lower()
 61
 62    @timeout(TIMEOUT)
 63    def _execute_with_timeout(self, arguments) -> bool:
 64        """Execute a mapping with a provided timeout.
 65
 66        Returns
 67        -------
 68        success : bool
 69            Whether the execution was successfull or not.
 70        """
 71        # Set Java heap to 1/2 of available memory instead of the default 1/4
 72        max_heap = int(psutil.virtual_memory().total * (1/2))
 73
 74        # Execute command
 75        cmd = f'java -Xmx{max_heap} -Xms{max_heap} ' + \
 76              '-cp .:morph-rdb-dist-3.12.6.jar:dependency/* ' + \
 77              'es.upm.fi.dia.oeg.morph.r2rml.rdb.engine.MorphRDBRunner ' + \
 78              '/data config.properties'
 79        success = self.run_and_wait_for_exit(cmd)
 80
 81        return success
 82
 83    def execute(self, arguments: list) -> bool:
 84        """Execute Morph-KGC with the given arguments.
 85
 86        Parameters
 87        ----------
 88        arguments : list
 89            Additional arguments to supply to Morph-KGC.
 90
 91        Returns
 92        -------
 93        success : bool
 94            Whether the execution succeeded or not.
 95        """
 96        try:
 97            return self._execute_with_timeout(arguments)
 98        except TimeoutError:
 99            msg = f'Timeout ({TIMEOUT}s) reached for Morph-RDB'
100            self._logger.warning(msg)
101
102        return False
103
104    def execute_mapping(self, mapping_file: str, output_file: str,
105                        serialization: str, rdb_username: str,
106                        rdb_password: str, rdb_host: str,
107                        rdb_port: int, rdb_name: str,
108                        rdb_type: str) -> bool:
109        """Execute a mapping file with Morph-RDB.
110
111        N-Quads and N-Triples are currently supported as serialization
112        format for Morph-RDB. Only relational databases are supported by
113        Morph-RDB, thus the relational database parameters are mandantory.
114
115        Parameters
116        ----------
117        mapping_file : str
118            Path to the mapping file to execute.
119        output_file : str
120            Name of the output file to store the triples in.
121        serialization : str
122            Serialization format to use.
123        rdb_username : str
124            Username for the database.
125        rdb_password : str
126            Password for the database.
127        rdb_host : str
128            Hostname for the database.
129        rdb_port : int
130            Port for the database.
131        rdb_name : str
132            Database name for the database.
133        rdb_type : str
134            Database type.
135
136        Returns
137        -------
138        success : bool
139            Whether the execution was successfull or not.
140        """
141
142        if serialization == 'nquads':
143            serialization = 'N-QUADS'
144        elif serialization == 'ntriples':
145            serialization = 'N-TRIPLE'
146        else:
147            raise NotImplementedError('Unsupported serialization: '
148                                      f'"{serialization}"')
149
150        # Generate INI configuration file since no CLI is available
151        config = configparser.ConfigParser()
152        mapping_file = os.path.join('shared', os.path.basename(mapping_file))
153        output_file = os.path.join('shared', os.path.basename(output_file))
154        config['root'] = {
155            'mappingdocument.file.path': mapping_file,
156            'output.file.path': output_file,
157            'output.rdflanguage': serialization,
158        }
159
160        config['root']['database.name[0]'] = rdb_name
161        if rdb_type == 'MySQL':
162            config['root']['database.driver[0]'] = 'com.mysql.jdbc.Driver'
163            config['root']['database.type[0]'] = 'mysql'
164            dsn = f'jdbc:mysql://{rdb_host}:{rdb_port}/{rdb_name}' + \
165                  '?allowPublicKeyRetrieval=true&useSSL=false'
166            config['root']['database.url[0]'] = dsn
167        elif rdb_type == 'PostgreSQL':
168            config['root']['database.driver[0]'] = 'org.postgresql.Driver'
169            config['root']['database.type[0]'] = 'postgresql'
170            dsn = f'jdbc:postgresql://{rdb_host}:{rdb_port}/{rdb_name}'
171            config['root']['database.url[0]'] = dsn
172        else:
173            raise ValueError(f'Unknown RDB type: "{rdb_type}"')
174        config['root']['database.user[0]'] = rdb_username
175        config['root']['database.pwd[0]'] = rdb_password
176        config['root']['no_of_database'] = '1'
177
178        path = os.path.join(self._data_path, 'morphrdb')
179        os.umask(0)
180        os.makedirs(path, exist_ok=True)
181        with open(os.path.join(path, 'config.properties'), 'w') as f:
182            config.write(f, space_around_delimiters=False)
183
184        # .properties files are like .ini files but without a [HEADER]
185        # Use a [root] header and remove it after writing
186        with open(os.path.join(path, 'config.properties'), 'r') as f:
187            data = f.read()
188
189        with open(os.path.join(path, 'config.properties'), 'w') as f:
190            f.write(data.replace('[root]\n', ''))
191
192        return self.execute([])
class MorphRDB(bench_executor.container.Container):
 23class MorphRDB(Container):
 24    """Morph-RDB container for executing R2RML mappings."""
 25    def __init__(self, data_path: str, config_path: str, directory: str,
 26                 verbose: bool):
 27        """Creates an instance of the MorphRDB class.
 28
 29        Parameters
 30        ----------
 31        data_path : str
 32            Path to the data directory of the case.
 33        config_path : str
 34            Path to the config directory of the case.
 35        directory : str
 36            Path to the directory to store logs.
 37        verbose : bool
 38            Enable verbose logs.
 39        """
 40        self._data_path = os.path.abspath(data_path)
 41        self._config_path = os.path.abspath(config_path)
 42        self._logger = Logger(__name__, directory, verbose)
 43
 44        os.umask(0)
 45        os.makedirs(os.path.join(self._data_path, 'morphrdb'), exist_ok=True)
 46        super().__init__(f'blindreviewing/morph-rdb:v{VERSION}', 'Morph-RDB',
 47                         self._logger,
 48                         volumes=[f'{self._data_path}/shared:/data/shared',
 49                                  f'{self._data_path}/morphrdb:/data'])
 50
 51    @property
 52    def root_mount_directory(self) -> str:
 53        """Subdirectory in the root directory of the case for Morph-RDB.
 54
 55        Returns
 56        -------
 57        subdirectory : str
 58            Subdirectory of the root directory for Morph-RDB.
 59
 60        """
 61        return __name__.lower()
 62
 63    @timeout(TIMEOUT)
 64    def _execute_with_timeout(self, arguments) -> bool:
 65        """Execute a mapping with a provided timeout.
 66
 67        Returns
 68        -------
 69        success : bool
 70            Whether the execution was successfull or not.
 71        """
 72        # Set Java heap to 1/2 of available memory instead of the default 1/4
 73        max_heap = int(psutil.virtual_memory().total * (1/2))
 74
 75        # Execute command
 76        cmd = f'java -Xmx{max_heap} -Xms{max_heap} ' + \
 77              '-cp .:morph-rdb-dist-3.12.6.jar:dependency/* ' + \
 78              'es.upm.fi.dia.oeg.morph.r2rml.rdb.engine.MorphRDBRunner ' + \
 79              '/data config.properties'
 80        success = self.run_and_wait_for_exit(cmd)
 81
 82        return success
 83
 84    def execute(self, arguments: list) -> bool:
 85        """Execute Morph-KGC with the given arguments.
 86
 87        Parameters
 88        ----------
 89        arguments : list
 90            Additional arguments to supply to Morph-KGC.
 91
 92        Returns
 93        -------
 94        success : bool
 95            Whether the execution succeeded or not.
 96        """
 97        try:
 98            return self._execute_with_timeout(arguments)
 99        except TimeoutError:
100            msg = f'Timeout ({TIMEOUT}s) reached for Morph-RDB'
101            self._logger.warning(msg)
102
103        return False
104
105    def execute_mapping(self, mapping_file: str, output_file: str,
106                        serialization: str, rdb_username: str,
107                        rdb_password: str, rdb_host: str,
108                        rdb_port: int, rdb_name: str,
109                        rdb_type: str) -> bool:
110        """Execute a mapping file with Morph-RDB.
111
112        N-Quads and N-Triples are currently supported as serialization
113        format for Morph-RDB. Only relational databases are supported by
114        Morph-RDB, thus the relational database parameters are mandantory.
115
116        Parameters
117        ----------
118        mapping_file : str
119            Path to the mapping file to execute.
120        output_file : str
121            Name of the output file to store the triples in.
122        serialization : str
123            Serialization format to use.
124        rdb_username : str
125            Username for the database.
126        rdb_password : str
127            Password for the database.
128        rdb_host : str
129            Hostname for the database.
130        rdb_port : int
131            Port for the database.
132        rdb_name : str
133            Database name for the database.
134        rdb_type : str
135            Database type.
136
137        Returns
138        -------
139        success : bool
140            Whether the execution was successfull or not.
141        """
142
143        if serialization == 'nquads':
144            serialization = 'N-QUADS'
145        elif serialization == 'ntriples':
146            serialization = 'N-TRIPLE'
147        else:
148            raise NotImplementedError('Unsupported serialization: '
149                                      f'"{serialization}"')
150
151        # Generate INI configuration file since no CLI is available
152        config = configparser.ConfigParser()
153        mapping_file = os.path.join('shared', os.path.basename(mapping_file))
154        output_file = os.path.join('shared', os.path.basename(output_file))
155        config['root'] = {
156            'mappingdocument.file.path': mapping_file,
157            'output.file.path': output_file,
158            'output.rdflanguage': serialization,
159        }
160
161        config['root']['database.name[0]'] = rdb_name
162        if rdb_type == 'MySQL':
163            config['root']['database.driver[0]'] = 'com.mysql.jdbc.Driver'
164            config['root']['database.type[0]'] = 'mysql'
165            dsn = f'jdbc:mysql://{rdb_host}:{rdb_port}/{rdb_name}' + \
166                  '?allowPublicKeyRetrieval=true&useSSL=false'
167            config['root']['database.url[0]'] = dsn
168        elif rdb_type == 'PostgreSQL':
169            config['root']['database.driver[0]'] = 'org.postgresql.Driver'
170            config['root']['database.type[0]'] = 'postgresql'
171            dsn = f'jdbc:postgresql://{rdb_host}:{rdb_port}/{rdb_name}'
172            config['root']['database.url[0]'] = dsn
173        else:
174            raise ValueError(f'Unknown RDB type: "{rdb_type}"')
175        config['root']['database.user[0]'] = rdb_username
176        config['root']['database.pwd[0]'] = rdb_password
177        config['root']['no_of_database'] = '1'
178
179        path = os.path.join(self._data_path, 'morphrdb')
180        os.umask(0)
181        os.makedirs(path, exist_ok=True)
182        with open(os.path.join(path, 'config.properties'), 'w') as f:
183            config.write(f, space_around_delimiters=False)
184
185        # .properties files are like .ini files but without a [HEADER]
186        # Use a [root] header and remove it after writing
187        with open(os.path.join(path, 'config.properties'), 'r') as f:
188            data = f.read()
189
190        with open(os.path.join(path, 'config.properties'), 'w') as f:
191            f.write(data.replace('[root]\n', ''))
192
193        return self.execute([])

Morph-RDB container for executing R2RML mappings.

MorphRDB(data_path: str, config_path: str, directory: str, verbose: bool)
25    def __init__(self, data_path: str, config_path: str, directory: str,
26                 verbose: bool):
27        """Creates an instance of the MorphRDB class.
28
29        Parameters
30        ----------
31        data_path : str
32            Path to the data directory of the case.
33        config_path : str
34            Path to the config directory of the case.
35        directory : str
36            Path to the directory to store logs.
37        verbose : bool
38            Enable verbose logs.
39        """
40        self._data_path = os.path.abspath(data_path)
41        self._config_path = os.path.abspath(config_path)
42        self._logger = Logger(__name__, directory, verbose)
43
44        os.umask(0)
45        os.makedirs(os.path.join(self._data_path, 'morphrdb'), exist_ok=True)
46        super().__init__(f'blindreviewing/morph-rdb:v{VERSION}', 'Morph-RDB',
47                         self._logger,
48                         volumes=[f'{self._data_path}/shared:/data/shared',
49                                  f'{self._data_path}/morphrdb:/data'])

Creates an instance of the MorphRDB class.

Parameters
  • data_path (str): Path to the data directory of the case.
  • config_path (str): Path to the config directory of the case.
  • directory (str): Path to the directory to store logs.
  • verbose (bool): Enable verbose logs.
root_mount_directory: str

Subdirectory in the root directory of the case for Morph-RDB.

Returns
  • subdirectory (str): Subdirectory of the root directory for Morph-RDB.
def execute(self, arguments: list) -> bool:
 84    def execute(self, arguments: list) -> bool:
 85        """Execute Morph-KGC with the given arguments.
 86
 87        Parameters
 88        ----------
 89        arguments : list
 90            Additional arguments to supply to Morph-KGC.
 91
 92        Returns
 93        -------
 94        success : bool
 95            Whether the execution succeeded or not.
 96        """
 97        try:
 98            return self._execute_with_timeout(arguments)
 99        except TimeoutError:
100            msg = f'Timeout ({TIMEOUT}s) reached for Morph-RDB'
101            self._logger.warning(msg)
102
103        return False

Execute Morph-KGC with the given arguments.

Parameters
  • arguments (list): Additional arguments to supply to Morph-KGC.
Returns
  • success (bool): Whether the execution succeeded or not.
def execute_mapping( self, mapping_file: str, output_file: str, serialization: str, rdb_username: str, rdb_password: str, rdb_host: str, rdb_port: int, rdb_name: str, rdb_type: str) -> bool:
105    def execute_mapping(self, mapping_file: str, output_file: str,
106                        serialization: str, rdb_username: str,
107                        rdb_password: str, rdb_host: str,
108                        rdb_port: int, rdb_name: str,
109                        rdb_type: str) -> bool:
110        """Execute a mapping file with Morph-RDB.
111
112        N-Quads and N-Triples are currently supported as serialization
113        format for Morph-RDB. Only relational databases are supported by
114        Morph-RDB, thus the relational database parameters are mandantory.
115
116        Parameters
117        ----------
118        mapping_file : str
119            Path to the mapping file to execute.
120        output_file : str
121            Name of the output file to store the triples in.
122        serialization : str
123            Serialization format to use.
124        rdb_username : str
125            Username for the database.
126        rdb_password : str
127            Password for the database.
128        rdb_host : str
129            Hostname for the database.
130        rdb_port : int
131            Port for the database.
132        rdb_name : str
133            Database name for the database.
134        rdb_type : str
135            Database type.
136
137        Returns
138        -------
139        success : bool
140            Whether the execution was successfull or not.
141        """
142
143        if serialization == 'nquads':
144            serialization = 'N-QUADS'
145        elif serialization == 'ntriples':
146            serialization = 'N-TRIPLE'
147        else:
148            raise NotImplementedError('Unsupported serialization: '
149                                      f'"{serialization}"')
150
151        # Generate INI configuration file since no CLI is available
152        config = configparser.ConfigParser()
153        mapping_file = os.path.join('shared', os.path.basename(mapping_file))
154        output_file = os.path.join('shared', os.path.basename(output_file))
155        config['root'] = {
156            'mappingdocument.file.path': mapping_file,
157            'output.file.path': output_file,
158            'output.rdflanguage': serialization,
159        }
160
161        config['root']['database.name[0]'] = rdb_name
162        if rdb_type == 'MySQL':
163            config['root']['database.driver[0]'] = 'com.mysql.jdbc.Driver'
164            config['root']['database.type[0]'] = 'mysql'
165            dsn = f'jdbc:mysql://{rdb_host}:{rdb_port}/{rdb_name}' + \
166                  '?allowPublicKeyRetrieval=true&useSSL=false'
167            config['root']['database.url[0]'] = dsn
168        elif rdb_type == 'PostgreSQL':
169            config['root']['database.driver[0]'] = 'org.postgresql.Driver'
170            config['root']['database.type[0]'] = 'postgresql'
171            dsn = f'jdbc:postgresql://{rdb_host}:{rdb_port}/{rdb_name}'
172            config['root']['database.url[0]'] = dsn
173        else:
174            raise ValueError(f'Unknown RDB type: "{rdb_type}"')
175        config['root']['database.user[0]'] = rdb_username
176        config['root']['database.pwd[0]'] = rdb_password
177        config['root']['no_of_database'] = '1'
178
179        path = os.path.join(self._data_path, 'morphrdb')
180        os.umask(0)
181        os.makedirs(path, exist_ok=True)
182        with open(os.path.join(path, 'config.properties'), 'w') as f:
183            config.write(f, space_around_delimiters=False)
184
185        # .properties files are like .ini files but without a [HEADER]
186        # Use a [root] header and remove it after writing
187        with open(os.path.join(path, 'config.properties'), 'r') as f:
188            data = f.read()
189
190        with open(os.path.join(path, 'config.properties'), 'w') as f:
191            f.write(data.replace('[root]\n', ''))
192
193        return self.execute([])

Execute a mapping file with Morph-RDB.

N-Quads and N-Triples are currently supported as serialization format for Morph-RDB. Only relational databases are supported by Morph-RDB, thus the relational database parameters are mandantory.

Parameters
  • mapping_file (str): Path to the mapping file to execute.
  • output_file (str): Name of the output file to store the triples in.
  • serialization (str): Serialization format to use.
  • rdb_username (str): Username for the database.
  • rdb_password (str): Password for the database.
  • rdb_host (str): Hostname for the database.
  • rdb_port (int): Port for the database.
  • rdb_name (str): Database name for the database.
  • rdb_type (str): Database type.
Returns
  • success (bool): Whether the execution was successfull or not.