bench_executor.sdmrdfizer

SDM-RDFizer is an efficient RML-Compliant engine for knowledge graph construction.

Repository: https://github.com/SDM-TIB/SDM-RDFizer

  1#!/usr/bin/env python3
  2
  3"""
  4SDM-RDFizer is an efficient RML-Compliant engine for knowledge graph
  5construction.
  6
  7**Repository**: https://github.com/SDM-TIB/SDM-RDFizer
  8"""
  9
 10import os
 11import configparser
 12from rdflib import Graph, BNode, Namespace, Literal, RDF
 13from timeout_decorator import timeout, TimeoutError  # type: ignore
 14from typing import Optional
 15from bench_executor.container import Container
 16from bench_executor.logger import Logger
 17
 18VERSION = '4.6.6.5'
 19TIMEOUT = 6 * 3600  # 6 hours
 20R2RML = Namespace('http://www.w3.org/ns/r2rml#')
 21RML = Namespace('http://semweb.mmlab.be/ns/rml#')
 22D2RQ = Namespace('http://www.wiwiss.fu-berlin.de/suhl/bizer/D2RQ/0.1#')
 23IMAGE = f'blindreviewing/sdm-rdfizer:v{VERSION}'
 24
 25
 26class SDMRDFizer(Container):
 27    """SDMRDFizer container for executing RML mappings."""
 28
 29    def __init__(self, data_path: str, config_path: str, directory: str,
 30                 verbose: bool):
 31        """Creates an instance of the SDMRDFizer class.
 32
 33        Parameters
 34        ----------
 35        data_path : str
 36            Path to the data directory of the case.
 37        config_path : str
 38            Path to the config directory of the case.
 39        directory : str
 40            Path to the directory to store logs.
 41        verbose : bool
 42            Enable verbose logs.
 43        """
 44        self._data_path = os.path.abspath(data_path)
 45        self._config_path = os.path.abspath(config_path)
 46        self._logger = Logger(__name__, directory, verbose)
 47
 48        os.umask(0)
 49        os.makedirs(os.path.join(self._data_path, 'sdmrdfizer'), exist_ok=True)
 50        super().__init__(IMAGE, 'SDM-RDFizer', self._logger,
 51                         volumes=[f'{self._data_path}/sdmrdfizer:/data',
 52                                  f'{self._data_path}/shared:/data/shared'])
 53
 54    @property
 55    def root_mount_directory(self) -> str:
 56        """Subdirectory in the root directory of the case for SDM-RDFizer.
 57
 58        Returns
 59        -------
 60        subdirectory : str
 61            Subdirectory of the root directory for SDM-RDFizer.
 62
 63        """
 64        return __name__.lower()
 65
 66    @timeout(TIMEOUT)
 67    def _execute_with_timeout(self, arguments) -> bool:
 68        """Execute a mapping with a provided timeout.
 69
 70        Returns
 71        -------
 72        success : bool
 73            Whether the execution was successfull or not.
 74        """
 75        cmd = 'python3 sdm-rdfizer/rdfizer/run_rdfizer.py ' + \
 76              '/data/config_sdmrdfizer.ini'
 77        return self.run_and_wait_for_exit(cmd)
 78
 79    def execute(self, arguments: list) -> bool:
 80        """Execute SDM-RDFizer with given arguments.
 81
 82        Parameters
 83        ----------
 84        arguments : list
 85            Arguments to supply to SDM-RDFizer.
 86
 87        Returns
 88        -------
 89        success : bool
 90            Whether the execution succeeded or not.
 91        """
 92        try:
 93            return self._execute_with_timeout(arguments)
 94        except TimeoutError:
 95            msg = f'Timeout ({TIMEOUT}s) reached for SDM-RDFizer'
 96            self._logger.error(msg)
 97
 98        return False
 99
100    def execute_mapping(self,
101                        mapping_file: str,
102                        output_file: str,
103                        serialization: str,
104                        rdb_username: Optional[str] = None,
105                        rdb_password: Optional[str] = None,
106                        rdb_host: Optional[str] = None,
107                        rdb_port: Optional[int] = None,
108                        rdb_name: Optional[str] = None,
109                        rdb_type: Optional[str] = None) -> bool:
110        """Execute a mapping file with SDM-RDFizer.
111
112        N-Quads and N-Triples are currently supported as serialization
113        format for RMLMapper.
114
115        Parameters
116        ----------
117        mapping_file : str
118            Path to the mapping file to execute.
119        output_file : str
120            Name of the output file to store the triples in.
121        serialization : str
122            Serialization format to use.
123        rdb_username : Optional[str]
124            Username for the database, required when a database is used as
125            source.
126        rdb_password : Optional[str]
127            Password for the database, required when a database is used as
128            source.
129        rdb_host : Optional[str]
130            Hostname for the database, required when a database is used as
131            source.
132        rdb_port : Optional[int]
133            Port for the database, required when a database is used as source.
134        rdb_name : Optional[str]
135            Database name for the database, required when a database is used as
136            source.
137        rdb_type : Optional[str]
138            Database type, required when a database is used as source.
139
140        Returns
141        -------
142        success : bool
143            Whether the execution was successfull or not.
144        """
145
146        # Configuration file
147        name = os.path.splitext(os.path.basename(output_file))[0]
148        config = configparser.ConfigParser(delimiters=':')
149        config['default'] = {
150            'main_directory': '/data/shared'
151        }
152        config['datasets'] = {
153            'number_of_datasets': str(1),
154            'output_folder': '/data/shared',
155            'all_in_one_file': 'yes',
156            'remove_duplicate': 'yes',
157            'enrichment': 'yes',
158            'name': name,
159            'ordered': 'no',
160            'large_file': 'false'
161        }
162        config['dataset1'] = {
163            'name': name,
164            'mapping': f'/data/shared/{os.path.basename(mapping_file)}'
165        }
166
167        if serialization == 'ntriples':
168            config['datasets']['output_format'] = 'n-triples'
169        elif serialization == 'turtle':
170            config['datasets']['output_format'] = 'turtle'
171        else:
172            raise NotImplementedError('SDM-RDFizer does not support'
173                                      '"serialization" output format')
174
175        if rdb_username is not None and rdb_password is not None \
176                and rdb_host is not None and rdb_port is not None \
177                and rdb_name is not None and rdb_type is not None:
178            config['dataset1']['user'] = rdb_username
179            config['dataset1']['password'] = rdb_password
180            config['dataset1']['host'] = rdb_host
181            config['dataset1']['port'] = str(rdb_port)
182            config['dataset1']['db'] = rdb_name
183            config['dataset1']['mapping'] = '/data/mapping_converted.rml.ttl'
184            if rdb_type == 'MySQL':
185                config['datasets']['dbType'] = 'mysql'
186                driver = 'jdbc:mysql'
187            elif rdb_type == 'PostgreSQL':
188                config['datasets']['dbType'] = 'postgres'
189                driver = 'jdbc:postgresql'
190            else:
191                raise NotImplementedError('SDM-RDFizer does not support RDB '
192                                          f'"{rdb_type}"')
193            dsn = f'{driver}://{rdb_host}:{rdb_port}/{rdb_name}'
194
195            # Compatibility with R2RML mapping files
196            # Replace rr:logicalTable with rml:logicalSource + D2RQ description
197            # and rr:column with rml:reference
198            g = Graph()
199            g.bind('rr', R2RML)
200            g.bind('rml', RML)
201            g.bind('d2rq', D2RQ)
202            g.bind('rdf', RDF)
203            g.parse(os.path.join(self._data_path, 'shared',
204                                 os.path.basename(mapping_file)))
205
206            # rr:logicalTable --> rml:logicalSource
207            for triples_map_iri, p, o in g.triples((None, RDF.type,
208                                                    R2RML.TriplesMap)):
209                logical_source_iri = BNode()
210                d2rq_rdb_iri = BNode()
211                logical_table_iri = g.value(triples_map_iri,
212                                            R2RML.logicalTable)
213                table_name_literal = g.value(logical_table_iri,
214                                             R2RML.tableName)
215                g.add((d2rq_rdb_iri, D2RQ.jdbcDSN, Literal(dsn)))
216                g.add((d2rq_rdb_iri, D2RQ.jdbcDriver, Literal(driver)))
217                g.add((d2rq_rdb_iri, D2RQ.username, Literal(rdb_username)))
218                g.add((d2rq_rdb_iri, D2RQ.password, Literal(rdb_password)))
219                g.add((d2rq_rdb_iri, RDF.type, D2RQ.Database))
220                g.add((logical_source_iri, R2RML.sqlVersion, R2RML.SQL2008))
221                g.add((logical_source_iri, R2RML.tableName,
222                       table_name_literal))
223                g.add((logical_source_iri, RML.source, d2rq_rdb_iri))
224                g.add((logical_source_iri, RDF.type, RML.LogicalSource))
225                g.add((triples_map_iri, RML.logicalSource, logical_source_iri))
226                g.remove((triples_map_iri, R2RML.logicalTable,
227                          logical_table_iri))
228                g.remove((logical_table_iri, R2RML.tableName,
229                          table_name_literal))
230                g.remove((logical_table_iri, RDF.type, R2RML.LogicalTable))
231                g.remove((logical_table_iri, R2RML.sqlVersion, R2RML.SQL2008))
232
233            # rr:column --> rml:reference
234            for s, p, o in g.triples((None, R2RML.column, None)):
235                g.add((s, RML.reference, o))
236                g.remove((s, p, o))
237
238            # SDM-RDFizer cannot handle rml:referenceFormulation when using
239            # RDBs, remove it for safety
240            # https://github.com/SDM-TIB/SDM-RDFizer/issues/71
241            for s, p, o in g.triples((None, RML.referenceFormulation, None)):
242                g.remove((s, p, o))
243
244            destination = os.path.join(self._data_path, 'sdmrdfizer',
245                                       'mapping_converted.rml.ttl')
246            g.serialize(destination=destination, format='turtle')
247
248        os.umask(0)
249        os.makedirs(os.path.join(self._data_path, 'sdmrdfizer'), exist_ok=True)
250        path = os.path.join(self._data_path, 'sdmrdfizer',
251                            'config_sdmrdfizer.ini')
252        with open(path, 'w') as f:
253            config.write(f, space_around_delimiters=False)
254
255        return self.execute([])
class SDMRDFizer(bench_executor.container.Container):
 27class SDMRDFizer(Container):
 28    """SDMRDFizer container for executing RML mappings."""
 29
 30    def __init__(self, data_path: str, config_path: str, directory: str,
 31                 verbose: bool):
 32        """Creates an instance of the SDMRDFizer class.
 33
 34        Parameters
 35        ----------
 36        data_path : str
 37            Path to the data directory of the case.
 38        config_path : str
 39            Path to the config directory of the case.
 40        directory : str
 41            Path to the directory to store logs.
 42        verbose : bool
 43            Enable verbose logs.
 44        """
 45        self._data_path = os.path.abspath(data_path)
 46        self._config_path = os.path.abspath(config_path)
 47        self._logger = Logger(__name__, directory, verbose)
 48
 49        os.umask(0)
 50        os.makedirs(os.path.join(self._data_path, 'sdmrdfizer'), exist_ok=True)
 51        super().__init__(IMAGE, 'SDM-RDFizer', self._logger,
 52                         volumes=[f'{self._data_path}/sdmrdfizer:/data',
 53                                  f'{self._data_path}/shared:/data/shared'])
 54
 55    @property
 56    def root_mount_directory(self) -> str:
 57        """Subdirectory in the root directory of the case for SDM-RDFizer.
 58
 59        Returns
 60        -------
 61        subdirectory : str
 62            Subdirectory of the root directory for SDM-RDFizer.
 63
 64        """
 65        return __name__.lower()
 66
 67    @timeout(TIMEOUT)
 68    def _execute_with_timeout(self, arguments) -> bool:
 69        """Execute a mapping with a provided timeout.
 70
 71        Returns
 72        -------
 73        success : bool
 74            Whether the execution was successfull or not.
 75        """
 76        cmd = 'python3 sdm-rdfizer/rdfizer/run_rdfizer.py ' + \
 77              '/data/config_sdmrdfizer.ini'
 78        return self.run_and_wait_for_exit(cmd)
 79
 80    def execute(self, arguments: list) -> bool:
 81        """Execute SDM-RDFizer with given arguments.
 82
 83        Parameters
 84        ----------
 85        arguments : list
 86            Arguments to supply to SDM-RDFizer.
 87
 88        Returns
 89        -------
 90        success : bool
 91            Whether the execution succeeded or not.
 92        """
 93        try:
 94            return self._execute_with_timeout(arguments)
 95        except TimeoutError:
 96            msg = f'Timeout ({TIMEOUT}s) reached for SDM-RDFizer'
 97            self._logger.error(msg)
 98
 99        return False
100
101    def execute_mapping(self,
102                        mapping_file: str,
103                        output_file: str,
104                        serialization: str,
105                        rdb_username: Optional[str] = None,
106                        rdb_password: Optional[str] = None,
107                        rdb_host: Optional[str] = None,
108                        rdb_port: Optional[int] = None,
109                        rdb_name: Optional[str] = None,
110                        rdb_type: Optional[str] = None) -> bool:
111        """Execute a mapping file with SDM-RDFizer.
112
113        N-Quads and N-Triples are currently supported as serialization
114        format for RMLMapper.
115
116        Parameters
117        ----------
118        mapping_file : str
119            Path to the mapping file to execute.
120        output_file : str
121            Name of the output file to store the triples in.
122        serialization : str
123            Serialization format to use.
124        rdb_username : Optional[str]
125            Username for the database, required when a database is used as
126            source.
127        rdb_password : Optional[str]
128            Password for the database, required when a database is used as
129            source.
130        rdb_host : Optional[str]
131            Hostname for the database, required when a database is used as
132            source.
133        rdb_port : Optional[int]
134            Port for the database, required when a database is used as source.
135        rdb_name : Optional[str]
136            Database name for the database, required when a database is used as
137            source.
138        rdb_type : Optional[str]
139            Database type, required when a database is used as source.
140
141        Returns
142        -------
143        success : bool
144            Whether the execution was successfull or not.
145        """
146
147        # Configuration file
148        name = os.path.splitext(os.path.basename(output_file))[0]
149        config = configparser.ConfigParser(delimiters=':')
150        config['default'] = {
151            'main_directory': '/data/shared'
152        }
153        config['datasets'] = {
154            'number_of_datasets': str(1),
155            'output_folder': '/data/shared',
156            'all_in_one_file': 'yes',
157            'remove_duplicate': 'yes',
158            'enrichment': 'yes',
159            'name': name,
160            'ordered': 'no',
161            'large_file': 'false'
162        }
163        config['dataset1'] = {
164            'name': name,
165            'mapping': f'/data/shared/{os.path.basename(mapping_file)}'
166        }
167
168        if serialization == 'ntriples':
169            config['datasets']['output_format'] = 'n-triples'
170        elif serialization == 'turtle':
171            config['datasets']['output_format'] = 'turtle'
172        else:
173            raise NotImplementedError('SDM-RDFizer does not support'
174                                      '"serialization" output format')
175
176        if rdb_username is not None and rdb_password is not None \
177                and rdb_host is not None and rdb_port is not None \
178                and rdb_name is not None and rdb_type is not None:
179            config['dataset1']['user'] = rdb_username
180            config['dataset1']['password'] = rdb_password
181            config['dataset1']['host'] = rdb_host
182            config['dataset1']['port'] = str(rdb_port)
183            config['dataset1']['db'] = rdb_name
184            config['dataset1']['mapping'] = '/data/mapping_converted.rml.ttl'
185            if rdb_type == 'MySQL':
186                config['datasets']['dbType'] = 'mysql'
187                driver = 'jdbc:mysql'
188            elif rdb_type == 'PostgreSQL':
189                config['datasets']['dbType'] = 'postgres'
190                driver = 'jdbc:postgresql'
191            else:
192                raise NotImplementedError('SDM-RDFizer does not support RDB '
193                                          f'"{rdb_type}"')
194            dsn = f'{driver}://{rdb_host}:{rdb_port}/{rdb_name}'
195
196            # Compatibility with R2RML mapping files
197            # Replace rr:logicalTable with rml:logicalSource + D2RQ description
198            # and rr:column with rml:reference
199            g = Graph()
200            g.bind('rr', R2RML)
201            g.bind('rml', RML)
202            g.bind('d2rq', D2RQ)
203            g.bind('rdf', RDF)
204            g.parse(os.path.join(self._data_path, 'shared',
205                                 os.path.basename(mapping_file)))
206
207            # rr:logicalTable --> rml:logicalSource
208            for triples_map_iri, p, o in g.triples((None, RDF.type,
209                                                    R2RML.TriplesMap)):
210                logical_source_iri = BNode()
211                d2rq_rdb_iri = BNode()
212                logical_table_iri = g.value(triples_map_iri,
213                                            R2RML.logicalTable)
214                table_name_literal = g.value(logical_table_iri,
215                                             R2RML.tableName)
216                g.add((d2rq_rdb_iri, D2RQ.jdbcDSN, Literal(dsn)))
217                g.add((d2rq_rdb_iri, D2RQ.jdbcDriver, Literal(driver)))
218                g.add((d2rq_rdb_iri, D2RQ.username, Literal(rdb_username)))
219                g.add((d2rq_rdb_iri, D2RQ.password, Literal(rdb_password)))
220                g.add((d2rq_rdb_iri, RDF.type, D2RQ.Database))
221                g.add((logical_source_iri, R2RML.sqlVersion, R2RML.SQL2008))
222                g.add((logical_source_iri, R2RML.tableName,
223                       table_name_literal))
224                g.add((logical_source_iri, RML.source, d2rq_rdb_iri))
225                g.add((logical_source_iri, RDF.type, RML.LogicalSource))
226                g.add((triples_map_iri, RML.logicalSource, logical_source_iri))
227                g.remove((triples_map_iri, R2RML.logicalTable,
228                          logical_table_iri))
229                g.remove((logical_table_iri, R2RML.tableName,
230                          table_name_literal))
231                g.remove((logical_table_iri, RDF.type, R2RML.LogicalTable))
232                g.remove((logical_table_iri, R2RML.sqlVersion, R2RML.SQL2008))
233
234            # rr:column --> rml:reference
235            for s, p, o in g.triples((None, R2RML.column, None)):
236                g.add((s, RML.reference, o))
237                g.remove((s, p, o))
238
239            # SDM-RDFizer cannot handle rml:referenceFormulation when using
240            # RDBs, remove it for safety
241            # https://github.com/SDM-TIB/SDM-RDFizer/issues/71
242            for s, p, o in g.triples((None, RML.referenceFormulation, None)):
243                g.remove((s, p, o))
244
245            destination = os.path.join(self._data_path, 'sdmrdfizer',
246                                       'mapping_converted.rml.ttl')
247            g.serialize(destination=destination, format='turtle')
248
249        os.umask(0)
250        os.makedirs(os.path.join(self._data_path, 'sdmrdfizer'), exist_ok=True)
251        path = os.path.join(self._data_path, 'sdmrdfizer',
252                            'config_sdmrdfizer.ini')
253        with open(path, 'w') as f:
254            config.write(f, space_around_delimiters=False)
255
256        return self.execute([])

SDMRDFizer container for executing RML mappings.

SDMRDFizer(data_path: str, config_path: str, directory: str, verbose: bool)
30    def __init__(self, data_path: str, config_path: str, directory: str,
31                 verbose: bool):
32        """Creates an instance of the SDMRDFizer class.
33
34        Parameters
35        ----------
36        data_path : str
37            Path to the data directory of the case.
38        config_path : str
39            Path to the config directory of the case.
40        directory : str
41            Path to the directory to store logs.
42        verbose : bool
43            Enable verbose logs.
44        """
45        self._data_path = os.path.abspath(data_path)
46        self._config_path = os.path.abspath(config_path)
47        self._logger = Logger(__name__, directory, verbose)
48
49        os.umask(0)
50        os.makedirs(os.path.join(self._data_path, 'sdmrdfizer'), exist_ok=True)
51        super().__init__(IMAGE, 'SDM-RDFizer', self._logger,
52                         volumes=[f'{self._data_path}/sdmrdfizer:/data',
53                                  f'{self._data_path}/shared:/data/shared'])

Creates an instance of the SDMRDFizer class.

Parameters
  • data_path (str): Path to the data directory of the case.
  • config_path (str): Path to the config directory of the case.
  • directory (str): Path to the directory to store logs.
  • verbose (bool): Enable verbose logs.
root_mount_directory: str

Subdirectory in the root directory of the case for SDM-RDFizer.

Returns
  • subdirectory (str): Subdirectory of the root directory for SDM-RDFizer.
def execute(self, arguments: list) -> bool:
80    def execute(self, arguments: list) -> bool:
81        """Execute SDM-RDFizer with given arguments.
82
83        Parameters
84        ----------
85        arguments : list
86            Arguments to supply to SDM-RDFizer.
87
88        Returns
89        -------
90        success : bool
91            Whether the execution succeeded or not.
92        """
93        try:
94            return self._execute_with_timeout(arguments)
95        except TimeoutError:
96            msg = f'Timeout ({TIMEOUT}s) reached for SDM-RDFizer'
97            self._logger.error(msg)
98
99        return False

Execute SDM-RDFizer with given arguments.

Parameters
  • arguments (list): Arguments to supply to SDM-RDFizer.
Returns
  • success (bool): Whether the execution succeeded or not.
def execute_mapping( self, mapping_file: str, output_file: str, serialization: str, rdb_username: Optional[str] = None, rdb_password: Optional[str] = None, rdb_host: Optional[str] = None, rdb_port: Optional[int] = None, rdb_name: Optional[str] = None, rdb_type: Optional[str] = None) -> bool:
101    def execute_mapping(self,
102                        mapping_file: str,
103                        output_file: str,
104                        serialization: str,
105                        rdb_username: Optional[str] = None,
106                        rdb_password: Optional[str] = None,
107                        rdb_host: Optional[str] = None,
108                        rdb_port: Optional[int] = None,
109                        rdb_name: Optional[str] = None,
110                        rdb_type: Optional[str] = None) -> bool:
111        """Execute a mapping file with SDM-RDFizer.
112
113        N-Quads and N-Triples are currently supported as serialization
114        format for RMLMapper.
115
116        Parameters
117        ----------
118        mapping_file : str
119            Path to the mapping file to execute.
120        output_file : str
121            Name of the output file to store the triples in.
122        serialization : str
123            Serialization format to use.
124        rdb_username : Optional[str]
125            Username for the database, required when a database is used as
126            source.
127        rdb_password : Optional[str]
128            Password for the database, required when a database is used as
129            source.
130        rdb_host : Optional[str]
131            Hostname for the database, required when a database is used as
132            source.
133        rdb_port : Optional[int]
134            Port for the database, required when a database is used as source.
135        rdb_name : Optional[str]
136            Database name for the database, required when a database is used as
137            source.
138        rdb_type : Optional[str]
139            Database type, required when a database is used as source.
140
141        Returns
142        -------
143        success : bool
144            Whether the execution was successfull or not.
145        """
146
147        # Configuration file
148        name = os.path.splitext(os.path.basename(output_file))[0]
149        config = configparser.ConfigParser(delimiters=':')
150        config['default'] = {
151            'main_directory': '/data/shared'
152        }
153        config['datasets'] = {
154            'number_of_datasets': str(1),
155            'output_folder': '/data/shared',
156            'all_in_one_file': 'yes',
157            'remove_duplicate': 'yes',
158            'enrichment': 'yes',
159            'name': name,
160            'ordered': 'no',
161            'large_file': 'false'
162        }
163        config['dataset1'] = {
164            'name': name,
165            'mapping': f'/data/shared/{os.path.basename(mapping_file)}'
166        }
167
168        if serialization == 'ntriples':
169            config['datasets']['output_format'] = 'n-triples'
170        elif serialization == 'turtle':
171            config['datasets']['output_format'] = 'turtle'
172        else:
173            raise NotImplementedError('SDM-RDFizer does not support'
174                                      '"serialization" output format')
175
176        if rdb_username is not None and rdb_password is not None \
177                and rdb_host is not None and rdb_port is not None \
178                and rdb_name is not None and rdb_type is not None:
179            config['dataset1']['user'] = rdb_username
180            config['dataset1']['password'] = rdb_password
181            config['dataset1']['host'] = rdb_host
182            config['dataset1']['port'] = str(rdb_port)
183            config['dataset1']['db'] = rdb_name
184            config['dataset1']['mapping'] = '/data/mapping_converted.rml.ttl'
185            if rdb_type == 'MySQL':
186                config['datasets']['dbType'] = 'mysql'
187                driver = 'jdbc:mysql'
188            elif rdb_type == 'PostgreSQL':
189                config['datasets']['dbType'] = 'postgres'
190                driver = 'jdbc:postgresql'
191            else:
192                raise NotImplementedError('SDM-RDFizer does not support RDB '
193                                          f'"{rdb_type}"')
194            dsn = f'{driver}://{rdb_host}:{rdb_port}/{rdb_name}'
195
196            # Compatibility with R2RML mapping files
197            # Replace rr:logicalTable with rml:logicalSource + D2RQ description
198            # and rr:column with rml:reference
199            g = Graph()
200            g.bind('rr', R2RML)
201            g.bind('rml', RML)
202            g.bind('d2rq', D2RQ)
203            g.bind('rdf', RDF)
204            g.parse(os.path.join(self._data_path, 'shared',
205                                 os.path.basename(mapping_file)))
206
207            # rr:logicalTable --> rml:logicalSource
208            for triples_map_iri, p, o in g.triples((None, RDF.type,
209                                                    R2RML.TriplesMap)):
210                logical_source_iri = BNode()
211                d2rq_rdb_iri = BNode()
212                logical_table_iri = g.value(triples_map_iri,
213                                            R2RML.logicalTable)
214                table_name_literal = g.value(logical_table_iri,
215                                             R2RML.tableName)
216                g.add((d2rq_rdb_iri, D2RQ.jdbcDSN, Literal(dsn)))
217                g.add((d2rq_rdb_iri, D2RQ.jdbcDriver, Literal(driver)))
218                g.add((d2rq_rdb_iri, D2RQ.username, Literal(rdb_username)))
219                g.add((d2rq_rdb_iri, D2RQ.password, Literal(rdb_password)))
220                g.add((d2rq_rdb_iri, RDF.type, D2RQ.Database))
221                g.add((logical_source_iri, R2RML.sqlVersion, R2RML.SQL2008))
222                g.add((logical_source_iri, R2RML.tableName,
223                       table_name_literal))
224                g.add((logical_source_iri, RML.source, d2rq_rdb_iri))
225                g.add((logical_source_iri, RDF.type, RML.LogicalSource))
226                g.add((triples_map_iri, RML.logicalSource, logical_source_iri))
227                g.remove((triples_map_iri, R2RML.logicalTable,
228                          logical_table_iri))
229                g.remove((logical_table_iri, R2RML.tableName,
230                          table_name_literal))
231                g.remove((logical_table_iri, RDF.type, R2RML.LogicalTable))
232                g.remove((logical_table_iri, R2RML.sqlVersion, R2RML.SQL2008))
233
234            # rr:column --> rml:reference
235            for s, p, o in g.triples((None, R2RML.column, None)):
236                g.add((s, RML.reference, o))
237                g.remove((s, p, o))
238
239            # SDM-RDFizer cannot handle rml:referenceFormulation when using
240            # RDBs, remove it for safety
241            # https://github.com/SDM-TIB/SDM-RDFizer/issues/71
242            for s, p, o in g.triples((None, RML.referenceFormulation, None)):
243                g.remove((s, p, o))
244
245            destination = os.path.join(self._data_path, 'sdmrdfizer',
246                                       'mapping_converted.rml.ttl')
247            g.serialize(destination=destination, format='turtle')
248
249        os.umask(0)
250        os.makedirs(os.path.join(self._data_path, 'sdmrdfizer'), exist_ok=True)
251        path = os.path.join(self._data_path, 'sdmrdfizer',
252                            'config_sdmrdfizer.ini')
253        with open(path, 'w') as f:
254            config.write(f, space_around_delimiters=False)
255
256        return self.execute([])

Execute a mapping file with SDM-RDFizer.

N-Quads and N-Triples are currently supported as serialization format for RMLMapper.

Parameters
  • mapping_file (str): Path to the mapping file to execute.
  • output_file (str): Name of the output file to store the triples in.
  • serialization (str): Serialization format to use.
  • rdb_username (Optional[str]): Username for the database, required when a database is used as source.
  • rdb_password (Optional[str]): Password for the database, required when a database is used as source.
  • rdb_host (Optional[str]): Hostname for the database, required when a database is used as source.
  • rdb_port (Optional[int]): Port for the database, required when a database is used as source.
  • rdb_name (Optional[str]): Database name for the database, required when a database is used as source.
  • rdb_type (Optional[str]): Database type, required when a database is used as source.
Returns
  • success (bool): Whether the execution was successfull or not.