# -*- coding: utf-8 -*-
#
# This file is part of Invenio.
# Copyright (C) 2015-2018 CERN.
#
# Invenio is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
"""Utilities."""
from __future__ import absolute_import, print_function
import re
from functools import partial
from flask import current_app
from lxml import etree
from lxml.builder import E
from lxml.etree import Element
from werkzeug.utils import import_string
try:
from functools import lru_cache
except ImportError: # pragma: no cover
from functools32 import lru_cache
ns = {
None: 'http://datacite.org/schema/kernel-4',
'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
'xml': 'xml',
}
NS_EPRINTS = {None: 'http://www.openarchives.org/OAI/2.0/eprints'}
EPRINTS_SCHEMA_LOCATION = 'http://www.openarchives.org/OAI/2.0/eprints'
EPRINTS_SCHEMA_LOCATION_XSD = \
'http://www.openarchives.org/OAI/2.0/eprints.xsd'
NS_OAI_IDENTIFIER = {None:
'http://www.openarchives.org/OAI/2.0/oai-identifier'}
OAI_IDENTIFIER_SCHEMA_LOCATION = \
'http://www.openarchives.org/OAI/2.0/oai-identifier'
OAI_IDENTIFIER_SCHEMA_LOCATION_XSD = \
'http://www.openarchives.org/OAI/2.0/oai-identifier.xsd'
NS_FRIENDS = {None: 'http://www.openarchives.org/OAI/2.0/friends/'}
FRIENDS_SCHEMA_LOCATION = 'http://www.openarchives.org/OAI/2.0/friends/'
FRIENDS_SCHEMA_LOCATION_XSD = \
'http://www.openarchives.org/OAI/2.0/friends/.xsd'
[docs]@lru_cache(maxsize=100)
def serializer(metadata_prefix):
"""Return etree_dumper instances.
:param metadata_prefix: One of the metadata identifiers configured in
``OAISERVER_METADATA_FORMATS``.
"""
metadataFormats = current_app.config['OAISERVER_METADATA_FORMATS']
serializer_ = metadataFormats[metadata_prefix]['serializer']
if isinstance(serializer_, tuple):
return partial(import_string(serializer_[0]), **serializer_[1])
return import_string(serializer_)
[docs]def dumps_etree(pid, record, **kwargs):
"""Dump MARC21 compatible record.
:param pid: The :class:`invenio_pidstore.models.PersistentIdentifier`
instance.
:param record: The :class:`invenio_records.api.Record` instance.
:returns: A LXML Element instance.
"""
from dojson.contrib.to_marc21 import to_marc21
from dojson.contrib.to_marc21.utils import dumps_etree
return dumps_etree(to_marc21.do(record['_source']), **kwargs)
[docs]def datetime_to_datestamp(dt, day_granularity=False):
"""Transform datetime to datestamp.
:param dt: The datetime to convert.
:param day_granularity: Set day granularity on datestamp.
:returns: The datestamp.
"""
# assert dt.tzinfo is None # only accept timezone naive datetimes
# ignore microseconds
dt = dt.replace(microsecond=0)
result = dt.isoformat() + 'Z'
if day_granularity:
result = result[:-10]
return result
[docs]def eprints_description(metadataPolicy, dataPolicy,
submissionPolicy=None, content=None):
"""Generate the eprints element for the identify response.
The eprints container is used by the e-print community to describe
the content and policies of repositories.
For the full specification and schema definition visit:
http://www.openarchives.org/OAI/2.0/guidelines-eprints.htm
"""
eprints = Element(etree.QName(NS_EPRINTS[None], 'eprints'),
nsmap=NS_EPRINTS)
eprints.set(etree.QName(ns['xsi'], 'schemaLocation'),
'{0} {1}'.format(EPRINTS_SCHEMA_LOCATION,
EPRINTS_SCHEMA_LOCATION_XSD))
if content:
contentElement = etree.Element('content')
for key, value in content.items():
contentElement.append(E(key, value))
eprints.append(contentElement)
metadataPolicyElement = etree.Element('metadataPolicy')
for key, value in metadataPolicy.items():
metadataPolicyElement.append(E(key, value))
eprints.append(metadataPolicyElement)
dataPolicyElement = etree.Element('dataPolicy')
for key, value in dataPolicy.items():
dataPolicyElement.append(E(key, value))
eprints.append(dataPolicyElement)
if submissionPolicy:
submissionPolicyElement = etree.Element('submissionPolicy')
for key, value in submissionPolicy.items():
submissionPolicyElement.append(E(key, value))
eprints.append(submissionPolicyElement)
return etree.tostring(eprints, pretty_print=True)
[docs]def oai_identifier_description(scheme, repositoryIdentifier,
delimiter, sampleIdentifier):
"""Generate the oai-identifier element for the identify response.
The OAI identifier format is intended to provide persistent resource
identifiers for items in repositories that implement OAI-PMH.
For the full specification and schema definition visit:
http://www.openarchives.org/OAI/2.0/guidelines-oai-identifier.htm
"""
oai_identifier = Element(etree.QName(NS_OAI_IDENTIFIER[None],
'oai_identifier'),
nsmap=NS_OAI_IDENTIFIER)
oai_identifier.set(etree.QName(ns['xsi'], 'schemaLocation'),
'{0} {1}'.format(OAI_IDENTIFIER_SCHEMA_LOCATION,
OAI_IDENTIFIER_SCHEMA_LOCATION_XSD))
oai_identifier.append(E('scheme', scheme))
oai_identifier.append(E('repositoryIdentifier', repositoryIdentifier))
oai_identifier.append(E('delimiter', delimiter))
oai_identifier.append(E('sampleIdentifier', sampleIdentifier))
return etree.tostring(oai_identifier, pretty_print=True)
[docs]def friends_description(baseURLs):
"""Generate the friends element for the identify response.
The friends container is recommended for use by repositories
to list confederate repositories.
For the schema definition visit:
http://www.openarchives.org/OAI/2.0/guidelines-friends.htm
"""
friends = Element(etree.QName(NS_FRIENDS[None], 'friends'),
nsmap=NS_FRIENDS)
friends.set(etree.QName(ns['xsi'], 'schemaLocation'),
'{0} {1}'.format(FRIENDS_SCHEMA_LOCATION,
FRIENDS_SCHEMA_LOCATION_XSD))
for baseURL in baseURLs:
friends.append(E('baseURL', baseURL))
return etree.tostring(friends, pretty_print=True)
[docs]def sanitize_unicode(value):
"""Removes characters incompatible with XML1.0.
Following W3C recommandation : https://www.w3.org/TR/REC-xml/#charsets
Based on https://lsimons.wordpress.com/2011/03/17/stripping-illegal-characters-out-of-xml-in-python/ # noqa
"""
return re.sub(u'[\x00-\x08\x0B\x0C\x0E-\x1F\uD800-\uDFFF\uFFFE\uFFFF]',
'', value)