Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 64 additions & 52 deletions ckan/patches/ckanext-dcat/ckanext/dcat/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,36 +259,37 @@ def graph_from_dataset(self, dataset_dict):

dataset_ref1 = URIRef(dataset_uri(dataset_dict))

if 'm_lps' in dataset_dict.get('holder_identifier'):
holder_id = dataset_dict.get('holder_identifier') or ''
if 'm_lps' in holder_id:
dataset_ref1=dataset_ref1.replace(PREF_LANDING,"http://dati.lavoro.gov.it/")
if 'r_emiro' in dataset_dict.get('holder_identifier'):
if 'r_emiro' in holder_id:
dataset_ref1=dataset_ref1.replace(PREF_LANDING,"https://dati.emilia-romagna.it/")
dataset_ref1=dataset_ref1.replace("dati.comune.fe.it","https://dati.comune.fe.it")
if 'r_marche' in dataset_dict.get('holder_identifier'):
if 'm_lps' in holder_id:
dataset_ref1=dataset_ref1.replace(PREF_LANDING,"https://dati.regione.marche.it/")
if 'r_toscan' in dataset_dict.get('holder_identifier'):
if 'r_toscan' in holder_id:
dataset_ref1=dataset_ref1.replace(PREF_LANDING,"https://dati.toscana.it")
if 'r_basili' in dataset_dict.get('holder_identifier'):
if 'r_basili' in holder_id:
dataset_ref1=dataset_ref1.replace(PREF_LANDING,"https://dati.regione.basilicata.it/catalog/")
if 'r_lazio' in dataset_dict.get('holder_identifier'):
if 'r_lazio' in holder_id:
dataset_ref1=dataset_ref1.replace(PREF_LANDING,"http://dati.lazio.it/catalog/")
if 'aci' in dataset_dict.get('holder_identifier'):
if 'aci' in holder_id:
dataset_ref1=dataset_ref1.replace(PREF_LANDING,"http://lod.aci.it")
if 'c_l219' in dataset_dict.get('holder_identifier'):
if 'c_l219' in holder_id:
dataset_ref1=dataset_ref1.replace(PREF_LANDING,"http://aperto.comune.torino.it/")
if 'cr_campa' in dataset_dict.get('holder_identifier'):
if 'cr_campa' in holder_id:
dataset_ref1=dataset_ref1.replace(PREF_LANDING,"http://opendata-crc.di.unisa.it/")
if '00304260409' in dataset_dict.get('holder_identifier'):
if '00304260409' in holder_id:
dataset_ref1=dataset_ref1.replace(PREF_LANDING,"https://opendata.comune.rimini.it/")
if 'c_a345' in dataset_dict.get('holder_identifier'):
if 'c_a345' in holder_id:
dataset_ref1=dataset_ref1.replace(PREF_LANDING,"https://ckan.opendatalaquila.it")
if 'uds_ca' in dataset_dict.get('holder_identifier'):
if 'uds_ca' in holder_id:
dataset_ref1=dataset_ref1.replace(PREF_LANDING,"https://data.tdm-project.it")
if 'm_it' in dataset_dict.get('holder_identifier'):
if 'm_it' in holder_id:
dataset_ref1=dataset_ref1.replace(PREF_LANDING,"https://www.interno.gov.it/")
if '00514490010' in dataset_dict.get('holder_identifier'):
if '00514490010' in holder_id:
dataset_ref1=dataset_ref1.replace(PREF_LANDING,"http://aperto.comune.torino.it/")
if 'm_inf' in dataset_dict.get('holder_identifier'):
if 'm_inf' in holder_id:
dataset_ref1=dataset_ref1.replace(PREF_LANDING,"https://dati.mit.gov.it/")


Expand Down Expand Up @@ -406,14 +407,16 @@ def serialize_catalog(self, catalog_dict=None, dataset_dicts=None,
log.debug('catalog_ref in graph %s', catalog_ref)
cat_ref = self._add_source_catalog(catalog_ref, dataset_dict, dataset_ref)
if not cat_ref:
org_site = self.g.objects(
URIRef(str(catalog_ref) + "/organization/" + dataset_dict.get('owner_org')),
VCARD.hasURL
)
try:
self.g.add((next(org_site), DCAT.dataset, dataset_ref))
except StopIteration:
log.debug("No more elements in org_site")
owner_org = dataset_dict.get('owner_org') or ''
if owner_org:
org_site = self.g.objects(
URIRef(str(catalog_ref) + "/organization/" + owner_org),
VCARD.hasURL
)
try:
self.g.add((next(org_site), DCAT.dataset, dataset_ref))
except StopIteration:
log.debug("No more elements in org_site")
else:
self.g.add((cat_ref, DCAT.dataset, dataset_ref))

Expand Down Expand Up @@ -442,39 +445,45 @@ def _get_from_extra(key):

log.debug('source_uri pre patch %s',source_uri)
# patch per harvesting per hasPart Catalog
if 'r_marche' in dataset_dict.get('holder_identifier'):
holder_id = dataset_dict.get('holder_identifier') or ''
if 'm_lps' in holder_id:
source_uri='https://dati.regione.marche.it/'
elif 'r_emiro' in dataset_dict.get('holder_identifier'):
elif 'r_emiro' in holder_id:
source_uri='https://dati.emilia-romagna.it'
source_catalog_homepage=source_uri
elif 'r_toscan' in dataset_dict.get('holder_identifier'):
elif 'r_toscan' in holder_id:
source_uri='https://dati.toscana.it'
elif 'r_lazio' in dataset_dict.get('holder_identifier'):
elif 'r_lazio' in holder_id:
source_uri='http://dati.regione.lazio.it'
elif 'r_basili' in dataset_dict.get('holder_identifier'):
elif 'r_basili' in holder_id:
source_uri='https://dati.regione.basilicata.it'
elif 'aci' in dataset_dict.get('holder_identifier'):
elif 'aci' in holder_id:
source_uri='http://lod.aci.it/'
source_catalog_homepage=source_uri
elif 'm_lps' in dataset_dict.get('holder_identifier'):
elif 'm_lps' in holder_id:
source_uri='http://dati.lavoro.gov.it/'
elif 'c_l219' in dataset_dict.get('holder_identifier'):
elif 'c_l219' in holder_id:
source_uri='http://aperto.comune.torino.it/'
elif 'cr_campa' in dataset_dict.get('holder_identifier'):
elif 'cr_campa' in holder_id:
source_uri='http://opendata-crc.di.unisa.it/'
elif '00304260409' in dataset_dict.get('holder_identifier'):
elif '00304260409' in holder_id:
source_uri='https://opendata.comune.rimini.it/'
elif 'c_a345' in dataset_dict.get('holder_identifier'):
elif 'c_a345' in holder_id:
source_uri='https://ckan.opendatalaquila.it/'
elif 'uds_ca' in dataset_dict.get('holder_identifier'):
elif 'uds_ca' in holder_id:
source_uri='https://data.tdm-project.it'
elif 'm_it' in dataset_dict.get('holder_identifier'):
elif 'm_it' in holder_id:
source_uri='https://www.interno.gov.it/'
elif 'm_inf' in dataset_dict.get('holder_identifier'):
elif 'm_inf' in holder_id:
source_uri='https://dati.mit.gov.it/'
elif 'uni_ba' in dataset_dict.get('holder_identifier'):
elif 'uni_ba' in holder_id:
source_uri='http://opendata.uniba.it/'
elif 'opendata.maggioli.cloud' in dataset_dict.get('extras', []):
extras_list = dataset_dict.get('extras', [])
if isinstance(extras_list, list):
extras_str = str(extras_list)
else:
extras_str = ''
if 'opendata.maggioli.cloud' in extras_str:
source_uri='https://www.opendata.maggioli.cloud/'
log.debug('setto source_uri per Maggioli')

Expand Down Expand Up @@ -528,7 +537,7 @@ def _get_from_extra(key):
value='2024-01-01'
if key == 'source_catalog_homepage':
if not value:
if 'opendata.maggioli.cloud' in dataset_dict.get('extras', []):
if 'opendata.maggioli.cloud' in extras_str:
value='https://www.opendata.maggioli.cloud/organization/'+dataset_dict['organization']['name']+'#'
log.debug('setto homepage org Maggioli: %s',value)

Expand All @@ -540,12 +549,13 @@ def _get_from_extra(key):
value = value.replace('/#/#','')
if key == 'source_catalog_homepage' and not value.endswith("/#"):
value = value + '/'
if 'uni_ba' in dataset_dict.get('holder_identifier'):
holder_id_tmp = dataset_dict.get('holder_identifier') or ''
if 'uni_ba' in holder_id_tmp:
if key == 'source_catalog_homepage':
value = 'http://opendata.uniba.it/#'
if 'cciaan' in dataset_dict.get('holder_identifier'):
if 'cciaan' in holder_id_tmp:
value = 'https://opendata.marche.camcom.it'
if 'aci' in dataset_dict.get('holder_identifier'):
if 'aci' in holder_id_tmp:
dataset_dict['extras'].append({'key': 'source_catalog_modified', 'value': _get_from_extra('dcat_modified')})
dataset_dict['extras'].append({'key': 'source_catalog_language', 'value': 'ITA'})
if key == 'source_catalog_modified':
Expand Down Expand Up @@ -574,7 +584,7 @@ def _get_from_extra(key):
# patch patch per Marche perchè non ha metadati in extra per il catalogo d'origine.
if 'm_it' in identifier:
_pub= '{"uri": "", "name": "Ministero degli Interni", "email": "", "url": "https://www.interno.gov.it/", "type": "http://purl.org/adms/publishertype/NationalAuthority"}'
if 'r_marche' in identifier:
if 'm_lps' in identifier:
_pub= '{"uri": "", "name": "Regione Marche", "email": "", "url": "https://dati.regione.marche.it/", "type": "http://purl.org/adms/publishertype/RegionalAuthority"}'
if 'r_emiro' in identifier:
_pub= '{"uri": "", "name": "Regione Emilia-Romagna", "email": "", "url": "https://dati.emilia-romagna.it/", "type": "http://purl.org/adms/publishertype/RegionalAuthority"}'
Expand Down Expand Up @@ -624,21 +634,23 @@ def _get_from_extra(key):
for src_key, _type, predicate, required in publisher_sources:
val = pub.get(src_key)
if src_key == 'type':
if dataset_dict.get('holder_identifier'):
if 'r_' in dataset_dict.get('holder_identifier') or 'p_' in dataset_dict.get('holder_identifier') :
holder_id_auth = dataset_dict.get('holder_identifier') or ''
if holder_id_auth:
if 'r_' in holder_id_auth or 'p_' in holder_id_auth:
val="http://purl.org/adms/publishertype/RegionalAuthority"
if 'm_' in dataset_dict.get('holder_identifier'):
if 'm_' in holder_id_auth:
val="http://purl.org/adms/publishertype/NationalAuthority"
if 'c_' in dataset_dict.get('holder_identifier'):
if 'c_' in holder_id_auth:
val="http://purl.org/adms/publishertype/LocalAuthority"
if 'inail' in dataset_dict.get('holder_identifier') or 'inps' in dataset_dict.get('holder_identifier') or 'agid' in dataset_dict.get('holder_identifier'):
if 'inail' in holder_id_auth or 'inps' in holder_id_auth or 'agid' in holder_id_auth:
val="http://purl.org/adms/publishertype/NationalAuthority"
if 'anac' in dataset_dict.get('holder_identifier') or 'ispra' in dataset_dict.get('holder_identifier') or 'pcm' in dataset_dict.get('holder_identifier'):
if 'anac' in holder_id_auth or 'ispra' in holder_id_auth or 'pcm' in holder_id_auth:
val="http://purl.org/adms/publishertype/NationalAuthority"
if src_key == 'url':
homepage=_get_from_extra('source_catalog_homepage')
if dataset_dict.get('holder_identifier'):
if 'aci' in dataset_dict.get('holder_identifier'):
holder_id_url = dataset_dict.get('holder_identifier') or ''
if holder_id_url:
if 'aci' in holder_id_url:
homepage='http://lod.aci.it/'
if homepage is not None:
if homepage.endswith("/#"):
Expand Down