bench_executor.fuseki

Apache Jena Fuseki is a SPARQL server. It can run as an operating system service, as a Java web application (WAR file), and as a standalone server.

Website: https://jena.apache.org/documentation/fuseki2/

  1#!/usr/bin/env python3
  2
  3"""
  4Apache Jena Fuseki is a SPARQL server. It can run as an operating system
  5service, as a Java web application (WAR file), and as a standalone server.
  6
  7**Website**: https://jena.apache.org/documentation/fuseki2/
  8"""
  9
 10import os
 11import requests
 12import psutil
 13from typing import Dict
 14from bench_executor.container import Container
 15from bench_executor.logger import Logger
 16
 17VERSION = '4.6.1'
 18CMD_ARGS = '--tdb2 --update --loc /fuseki/databases/DB /ds'
 19
 20
 21class Fuseki(Container):
 22    """Fuseki container for executing SPARQL queries."""
 23    def __init__(self, data_path: str, config_path: str, directory: str,
 24                 verbose: bool):
 25        """Creates an instance of the Fuseki class.
 26
 27        Parameters
 28        ----------
 29        data_path : str
 30            Path to the data directory of the case.
 31        config_path : str
 32            Path to the config directory of the case.
 33        directory : str
 34            Path to the directory to store logs.
 35        verbose : bool
 36            Enable verbose logs.
 37        """
 38        self._data_path = os.path.abspath(data_path)
 39        self._config_path = os.path.abspath(config_path)
 40        self._logger = Logger(__name__, directory, verbose)
 41
 42        os.umask(0)
 43        os.makedirs(os.path.join(self._data_path, 'fuseki'), exist_ok=True)
 44
 45        # Set Java heap to 1/2 of available memory instead of the default 1/4
 46        max_heap = int(psutil.virtual_memory().total * (1/2))
 47
 48        super().__init__(f'blindreviewing/fuseki:v{VERSION}', 'Fuseki',
 49                         self._logger,
 50                         ports={'3030': '3030'},
 51                         environment={
 52                             'JAVA_OPTIONS': f'-Xmx{max_heap} -Xms{max_heap}'
 53                         },
 54                         volumes=[f'{self._config_path}/fuseki/'
 55                                  f'log4j2.properties:/fuseki/'
 56                                  f'log4j2.properties',
 57                                  f'{self._data_path}/shared:/data',
 58                                  f'{self._data_path}/fuseki:'
 59                                  '/fuseki/databases/DB'])
 60        self._endpoint = 'http://localhost:3030/ds/sparql'
 61
 62    def initialization(self) -> bool:
 63        """Initialize Fuseki's database.
 64
 65        Returns
 66        -------
 67        success : bool
 68            Whether the initialization was successfull or not.
 69        """
 70        # Fuseki should start with a initialized database, start Fuseki
 71        # if not initialized to avoid the pre-run start during benchmark
 72        # execution
 73        success = self.wait_until_ready()
 74        if not success:
 75            return False
 76        success = self.stop()
 77
 78        return success
 79
 80    @property
 81    def root_mount_directory(self) -> str:
 82        """Subdirectory in the root directory of the case for Fuseki.
 83
 84        Returns
 85        -------
 86        subdirectory : str
 87            Subdirectory of the root directory for Fuseki.
 88        """
 89        return __name__.lower()
 90
 91    @property
 92    def headers(self) -> Dict[str, Dict[str, str]]:
 93        """HTTP headers of SPARQL queries for serialization formats.
 94
 95        Only supported serialization formats are included in the dictionary.
 96        Currently, the following formats are supported:
 97        - N-Triples
 98        - Turtle
 99        - CSV
100        - RDF/JSON
101        - RDF/XML
102        - JSON-LD
103
104        Returns
105        -------
106        headers : dict
107            Dictionary of headers to use for each serialization format.
108        """
109        headers = {}
110        headers['ntriples'] = {'Accept': 'text/plain'}
111        headers['turtle'] = {'Accept': 'text/turtle'}
112        headers['csv'] = {'Accept': 'text/csv'}
113        headers['rdfjson'] = {'Accept': 'application/rdf+json'}
114        headers['rdfxml'] = {'Accept': 'application/rdf+xml'}
115        headers['jsonld'] = {'Accept': 'application/ld+json'}
116        return headers
117
118    def wait_until_ready(self, command: str = '') -> bool:
119        """Wait until Fuseki is ready to execute SPARQL queries.
120
121        Parameters
122        ----------
123        command : str
124            Command to execute in the Fuseki container, optionally, defaults to
125            no command.
126
127        Returns
128        -------
129        success : bool
130            Whether the Fuseki was initialized successfull or not.
131        """
132        command = f'{command} {CMD_ARGS}'
133        return self.run_and_wait_for_log(':: Start Fuseki ', command=command)
134
135    def load(self, rdf_file: str) -> bool:
136        """Load an RDF file into Fuseki.
137
138        Currently, only N-Triples files are supported.
139
140        Parameters
141        ----------
142        rdf_file : str
143            Name of the RDF file to load.
144
145        Returns
146        -------
147        success : bool
148            Whether the loading was successfull or not.
149        """
150        path = os.path.join(self._data_path, 'shared', rdf_file)
151
152        if not os.path.exists(path):
153            self._logger.error(f'RDF file "{rdf_file}" does not exist')
154            return False
155
156        # Load directory with data with HTTP post
157        try:
158            h = {'Content-Type': 'application/n-triples'}
159            r = requests.post('http://localhost:3030/ds',
160                              data=open(path, 'rb'),
161                              headers=h)
162            self._logger.debug(f'Loaded triples: {r.text}')
163            r.raise_for_status()
164        except Exception as e:
165            self._logger.error(f'Failed to load RDF: "{e}" into Fuseki')
166            return False
167
168        return True
169
170    def stop(self) -> bool:
171        """Stop Fuseki.
172
173        Drops all triples in Fuseki before stopping its container.
174
175        Returns
176        -------
177        success : bool
178            Whether stopping Fuseki was successfull or not.
179        """
180        # Drop triples on exit
181        try:
182            headers = {'Content-Type': 'application/sparql-update'}
183            data = 'DELETE { ?s ?p ?o . } WHERE { ?s ?p ?o . }'
184            r = requests.post('http://localhost:3030/ds/update',
185                              headers=headers, data=data)
186            self._logger.debug(f'Dropped triples: {r.text}')
187            r.raise_for_status()
188        except Exception as e:
189            self._logger.error(f'Failed to drop RDF: "{e}" from Fuseki')
190            return False
191
192        return super().stop()
193
194    @property
195    def endpoint(self):
196        """SPARQL endpoint URL"""
197        return self._endpoint
198
199
200if __name__ == '__main__':
201    print(f'ℹ️  Starting up Fuseki v{VERSION}...')
202    f = Fuseki('data', 'config', 'log', True)
203    f.wait_until_ready()
204    input('ℹ️  Press any key to stop')
205    f.stop()
206    print('ℹ️  Stopped')
class Fuseki(bench_executor.container.Container):
 22class Fuseki(Container):
 23    """Fuseki container for executing SPARQL queries."""
 24    def __init__(self, data_path: str, config_path: str, directory: str,
 25                 verbose: bool):
 26        """Creates an instance of the Fuseki class.
 27
 28        Parameters
 29        ----------
 30        data_path : str
 31            Path to the data directory of the case.
 32        config_path : str
 33            Path to the config directory of the case.
 34        directory : str
 35            Path to the directory to store logs.
 36        verbose : bool
 37            Enable verbose logs.
 38        """
 39        self._data_path = os.path.abspath(data_path)
 40        self._config_path = os.path.abspath(config_path)
 41        self._logger = Logger(__name__, directory, verbose)
 42
 43        os.umask(0)
 44        os.makedirs(os.path.join(self._data_path, 'fuseki'), exist_ok=True)
 45
 46        # Set Java heap to 1/2 of available memory instead of the default 1/4
 47        max_heap = int(psutil.virtual_memory().total * (1/2))
 48
 49        super().__init__(f'blindreviewing/fuseki:v{VERSION}', 'Fuseki',
 50                         self._logger,
 51                         ports={'3030': '3030'},
 52                         environment={
 53                             'JAVA_OPTIONS': f'-Xmx{max_heap} -Xms{max_heap}'
 54                         },
 55                         volumes=[f'{self._config_path}/fuseki/'
 56                                  f'log4j2.properties:/fuseki/'
 57                                  f'log4j2.properties',
 58                                  f'{self._data_path}/shared:/data',
 59                                  f'{self._data_path}/fuseki:'
 60                                  '/fuseki/databases/DB'])
 61        self._endpoint = 'http://localhost:3030/ds/sparql'
 62
 63    def initialization(self) -> bool:
 64        """Initialize Fuseki's database.
 65
 66        Returns
 67        -------
 68        success : bool
 69            Whether the initialization was successfull or not.
 70        """
 71        # Fuseki should start with a initialized database, start Fuseki
 72        # if not initialized to avoid the pre-run start during benchmark
 73        # execution
 74        success = self.wait_until_ready()
 75        if not success:
 76            return False
 77        success = self.stop()
 78
 79        return success
 80
 81    @property
 82    def root_mount_directory(self) -> str:
 83        """Subdirectory in the root directory of the case for Fuseki.
 84
 85        Returns
 86        -------
 87        subdirectory : str
 88            Subdirectory of the root directory for Fuseki.
 89        """
 90        return __name__.lower()
 91
 92    @property
 93    def headers(self) -> Dict[str, Dict[str, str]]:
 94        """HTTP headers of SPARQL queries for serialization formats.
 95
 96        Only supported serialization formats are included in the dictionary.
 97        Currently, the following formats are supported:
 98        - N-Triples
 99        - Turtle
100        - CSV
101        - RDF/JSON
102        - RDF/XML
103        - JSON-LD
104
105        Returns
106        -------
107        headers : dict
108            Dictionary of headers to use for each serialization format.
109        """
110        headers = {}
111        headers['ntriples'] = {'Accept': 'text/plain'}
112        headers['turtle'] = {'Accept': 'text/turtle'}
113        headers['csv'] = {'Accept': 'text/csv'}
114        headers['rdfjson'] = {'Accept': 'application/rdf+json'}
115        headers['rdfxml'] = {'Accept': 'application/rdf+xml'}
116        headers['jsonld'] = {'Accept': 'application/ld+json'}
117        return headers
118
119    def wait_until_ready(self, command: str = '') -> bool:
120        """Wait until Fuseki is ready to execute SPARQL queries.
121
122        Parameters
123        ----------
124        command : str
125            Command to execute in the Fuseki container, optionally, defaults to
126            no command.
127
128        Returns
129        -------
130        success : bool
131            Whether the Fuseki was initialized successfull or not.
132        """
133        command = f'{command} {CMD_ARGS}'
134        return self.run_and_wait_for_log(':: Start Fuseki ', command=command)
135
136    def load(self, rdf_file: str) -> bool:
137        """Load an RDF file into Fuseki.
138
139        Currently, only N-Triples files are supported.
140
141        Parameters
142        ----------
143        rdf_file : str
144            Name of the RDF file to load.
145
146        Returns
147        -------
148        success : bool
149            Whether the loading was successfull or not.
150        """
151        path = os.path.join(self._data_path, 'shared', rdf_file)
152
153        if not os.path.exists(path):
154            self._logger.error(f'RDF file "{rdf_file}" does not exist')
155            return False
156
157        # Load directory with data with HTTP post
158        try:
159            h = {'Content-Type': 'application/n-triples'}
160            r = requests.post('http://localhost:3030/ds',
161                              data=open(path, 'rb'),
162                              headers=h)
163            self._logger.debug(f'Loaded triples: {r.text}')
164            r.raise_for_status()
165        except Exception as e:
166            self._logger.error(f'Failed to load RDF: "{e}" into Fuseki')
167            return False
168
169        return True
170
171    def stop(self) -> bool:
172        """Stop Fuseki.
173
174        Drops all triples in Fuseki before stopping its container.
175
176        Returns
177        -------
178        success : bool
179            Whether stopping Fuseki was successfull or not.
180        """
181        # Drop triples on exit
182        try:
183            headers = {'Content-Type': 'application/sparql-update'}
184            data = 'DELETE { ?s ?p ?o . } WHERE { ?s ?p ?o . }'
185            r = requests.post('http://localhost:3030/ds/update',
186                              headers=headers, data=data)
187            self._logger.debug(f'Dropped triples: {r.text}')
188            r.raise_for_status()
189        except Exception as e:
190            self._logger.error(f'Failed to drop RDF: "{e}" from Fuseki')
191            return False
192
193        return super().stop()
194
195    @property
196    def endpoint(self):
197        """SPARQL endpoint URL"""
198        return self._endpoint

Fuseki container for executing SPARQL queries.

Fuseki(data_path: str, config_path: str, directory: str, verbose: bool)
24    def __init__(self, data_path: str, config_path: str, directory: str,
25                 verbose: bool):
26        """Creates an instance of the Fuseki class.
27
28        Parameters
29        ----------
30        data_path : str
31            Path to the data directory of the case.
32        config_path : str
33            Path to the config directory of the case.
34        directory : str
35            Path to the directory to store logs.
36        verbose : bool
37            Enable verbose logs.
38        """
39        self._data_path = os.path.abspath(data_path)
40        self._config_path = os.path.abspath(config_path)
41        self._logger = Logger(__name__, directory, verbose)
42
43        os.umask(0)
44        os.makedirs(os.path.join(self._data_path, 'fuseki'), exist_ok=True)
45
46        # Set Java heap to 1/2 of available memory instead of the default 1/4
47        max_heap = int(psutil.virtual_memory().total * (1/2))
48
49        super().__init__(f'blindreviewing/fuseki:v{VERSION}', 'Fuseki',
50                         self._logger,
51                         ports={'3030': '3030'},
52                         environment={
53                             'JAVA_OPTIONS': f'-Xmx{max_heap} -Xms{max_heap}'
54                         },
55                         volumes=[f'{self._config_path}/fuseki/'
56                                  f'log4j2.properties:/fuseki/'
57                                  f'log4j2.properties',
58                                  f'{self._data_path}/shared:/data',
59                                  f'{self._data_path}/fuseki:'
60                                  '/fuseki/databases/DB'])
61        self._endpoint = 'http://localhost:3030/ds/sparql'

Creates an instance of the Fuseki class.

Parameters
  • data_path (str): Path to the data directory of the case.
  • config_path (str): Path to the config directory of the case.
  • directory (str): Path to the directory to store logs.
  • verbose (bool): Enable verbose logs.
def initialization(self) -> bool:
63    def initialization(self) -> bool:
64        """Initialize Fuseki's database.
65
66        Returns
67        -------
68        success : bool
69            Whether the initialization was successfull or not.
70        """
71        # Fuseki should start with a initialized database, start Fuseki
72        # if not initialized to avoid the pre-run start during benchmark
73        # execution
74        success = self.wait_until_ready()
75        if not success:
76            return False
77        success = self.stop()
78
79        return success

Initialize Fuseki's database.

Returns
  • success (bool): Whether the initialization was successfull or not.
root_mount_directory: str

Subdirectory in the root directory of the case for Fuseki.

Returns
  • subdirectory (str): Subdirectory of the root directory for Fuseki.
headers: Dict[str, Dict[str, str]]

HTTP headers of SPARQL queries for serialization formats.

Only supported serialization formats are included in the dictionary. Currently, the following formats are supported:

  • N-Triples
  • Turtle
  • CSV
  • RDF/JSON
  • RDF/XML
  • JSON-LD
Returns
  • headers (dict): Dictionary of headers to use for each serialization format.
def wait_until_ready(self, command: str = '') -> bool:
119    def wait_until_ready(self, command: str = '') -> bool:
120        """Wait until Fuseki is ready to execute SPARQL queries.
121
122        Parameters
123        ----------
124        command : str
125            Command to execute in the Fuseki container, optionally, defaults to
126            no command.
127
128        Returns
129        -------
130        success : bool
131            Whether the Fuseki was initialized successfull or not.
132        """
133        command = f'{command} {CMD_ARGS}'
134        return self.run_and_wait_for_log(':: Start Fuseki ', command=command)

Wait until Fuseki is ready to execute SPARQL queries.

Parameters
  • command (str): Command to execute in the Fuseki container, optionally, defaults to no command.
Returns
  • success (bool): Whether the Fuseki was initialized successfull or not.
def load(self, rdf_file: str) -> bool:
136    def load(self, rdf_file: str) -> bool:
137        """Load an RDF file into Fuseki.
138
139        Currently, only N-Triples files are supported.
140
141        Parameters
142        ----------
143        rdf_file : str
144            Name of the RDF file to load.
145
146        Returns
147        -------
148        success : bool
149            Whether the loading was successfull or not.
150        """
151        path = os.path.join(self._data_path, 'shared', rdf_file)
152
153        if not os.path.exists(path):
154            self._logger.error(f'RDF file "{rdf_file}" does not exist')
155            return False
156
157        # Load directory with data with HTTP post
158        try:
159            h = {'Content-Type': 'application/n-triples'}
160            r = requests.post('http://localhost:3030/ds',
161                              data=open(path, 'rb'),
162                              headers=h)
163            self._logger.debug(f'Loaded triples: {r.text}')
164            r.raise_for_status()
165        except Exception as e:
166            self._logger.error(f'Failed to load RDF: "{e}" into Fuseki')
167            return False
168
169        return True

Load an RDF file into Fuseki.

Currently, only N-Triples files are supported.

Parameters
  • rdf_file (str): Name of the RDF file to load.
Returns
  • success (bool): Whether the loading was successfull or not.
def stop(self) -> bool:
171    def stop(self) -> bool:
172        """Stop Fuseki.
173
174        Drops all triples in Fuseki before stopping its container.
175
176        Returns
177        -------
178        success : bool
179            Whether stopping Fuseki was successfull or not.
180        """
181        # Drop triples on exit
182        try:
183            headers = {'Content-Type': 'application/sparql-update'}
184            data = 'DELETE { ?s ?p ?o . } WHERE { ?s ?p ?o . }'
185            r = requests.post('http://localhost:3030/ds/update',
186                              headers=headers, data=data)
187            self._logger.debug(f'Dropped triples: {r.text}')
188            r.raise_for_status()
189        except Exception as e:
190            self._logger.error(f'Failed to drop RDF: "{e}" from Fuseki')
191            return False
192
193        return super().stop()

Stop Fuseki.

Drops all triples in Fuseki before stopping its container.

Returns
  • success (bool): Whether stopping Fuseki was successfull or not.
endpoint

SPARQL endpoint URL