datahub/utils/index.ts

510 lines
14 KiB
TypeScript

const { URL } = require('url')
const bytes = require('bytes')
const slugify = require('slugify')
const config = require('../config')
module.exports.ckanToDataPackage = function (descriptor) {
// Make a copy
const datapackage = JSON.parse(JSON.stringify(descriptor))
// Lowercase name
datapackage.name = datapackage.name.toLowerCase()
// Rename notes => description
if (datapackage.notes) {
datapackage.description = datapackage.notes
delete datapackage.notes
}
// Rename ckan_url => homepage
if (datapackage.ckan_url) {
datapackage.homepage = datapackage.ckan_url
delete datapackage.ckan_url
}
// Parse license
const license = {}
if (datapackage.license_id) {
license.type = datapackage.license_id
delete datapackage.license_id
}
if (datapackage.license_title) {
license.title = datapackage.license_title
delete datapackage.license_title
}
if (datapackage.license_url) {
license.url = datapackage.license_url
delete datapackage.license_url
}
if (Object.keys(license).length > 0) {
datapackage.license = license
}
// Parse author and sources
const source = {}
if (datapackage.author) {
source.name = datapackage.author
delete datapackage.author
}
if (datapackage.author_email) {
source.email = datapackage.author_email
delete datapackage.author_email
}
if (datapackage.url) {
source.web = datapackage.url
delete datapackage.url
}
if (Object.keys(source).length > 0) {
datapackage.sources = [source]
}
// Parse maintainer
const author = {}
if (datapackage.maintainer) {
author.name = datapackage.maintainer
delete datapackage.maintainer
}
if (datapackage.maintainer_email) {
author.email = datapackage.maintainer_email
delete datapackage.maintainer_email
}
if (Object.keys(author).length > 0) {
datapackage.author = author
}
// Parse tags
if (datapackage.tags) {
datapackage.keywords = []
datapackage.tags.forEach(tag => {
datapackage.keywords.push(tag.name)
})
delete datapackage.tags
}
// Parse extras
// TODO
// Resources
datapackage.resources = datapackage.resources.map(resource => {
if (resource.name) {
resource.title = resource.title || resource.name
resource.name = resource.name.toLowerCase().replace(/ /g, '_')
} else {
resource.name = resource.id
}
if (resource.url) {
resource.path = resource.url
delete resource.url
}
if (!resource.schema) {
// If 'fields' property exists use it as schema fields
if (resource.fields) {
if (typeof(resource.fields) === 'string') {
try {
resource.fields = JSON.parse(resource.fields)
} catch (e) {
console.log('Could not parse resource.fields')
}
}
resource.schema = {fields: resource.fields}
delete resource.fields
}
}
return resource
})
return datapackage
}
/*
At the moment, we're considering only following examples of CKAN view:
1. recline_view => Data Explorer with Table view, Chart Builder, Map Builder
and Query Builder.
2. geojson_view => Leaflet map
3. pdf_view => our PDF viewer
4. recline_grid_view => our Table viewer
5. recline_graph_view => our Simple graph
6. recline_map_view => our Leaflet map
7. image_view => not supported at the moment
8. text_view => not supported at the moment
9. webpage_view => not supported at the moment
*/
module.exports.ckanViewToDataPackageView = (ckanView) => {
const viewTypeToSpecType = {
recline_view: 'dataExplorer', // from datastore data
recline_grid_view: 'table',
recline_graph_view: 'simple',
recline_map_view: 'tabularmap',
geojson_view: 'map',
pdf_view: 'document',
image_view: 'web',
webpage_view: 'web'
}
const dataPackageView = JSON.parse(JSON.stringify(ckanView))
dataPackageView.specType = viewTypeToSpecType[ckanView.view_type]
|| dataPackageView.specType
|| 'unsupported'
if (dataPackageView.specType === 'dataExplorer') {
dataPackageView.spec = {
widgets: [
{specType: 'table'},
{specType: 'simple'},
{specType: 'tabularmap'}
]
}
} else if (dataPackageView.specType === 'simple') {
const graphTypeConvert = {
lines: 'line',
'lines-and-points': 'lines-and-points',
points: 'points',
bars: 'horizontal-bar',
columns: 'bar'
}
dataPackageView.spec = {
group: ckanView.group,
series: Array.isArray(ckanView.series) ? ckanView.series : [ckanView.series],
type: graphTypeConvert[ckanView.graph_type] || 'line'
}
} else if (dataPackageView.specType === 'tabularmap') {
if (ckanView.map_field_type === 'geojson') {
dataPackageView.spec = {
geomField: ckanView.geojson_field
}
} else {
dataPackageView.spec = {
lonField: ckanView.longitude_field,
latField: ckanView.latitude_field
}
}
}
return dataPackageView
}
/*
Takes single field descriptor from datastore data dictionary and coverts into
tableschema field descriptor.
*/
module.exports.dataStoreDataDictionaryToTableSchema = (dataDictionary) => {
const internalDataStoreFields = ['_id', '_full_text', '_count']
if (internalDataStoreFields.includes(dataDictionary.id)) {
return null
}
const dataDictionaryType2TableSchemaType = {
'text': 'string',
'int': 'integer',
'float': 'number',
'date': 'date',
'time': 'time',
'timestamp': 'datetime',
'bool': 'boolean',
'json': 'object'
}
const field = {
name: dataDictionary.id,
type: dataDictionaryType2TableSchemaType[dataDictionary.type] || 'any'
}
if (dataDictionary.info) {
const constraintsAttributes = ['required', 'unique', 'minLength', 'maxLength', 'minimum', 'maximum', 'pattern', 'enum']
field.constraints = {}
Object.keys(dataDictionary.info).forEach(key => {
if (constraintsAttributes.includes(key)) {
field.constraints[key] = dataDictionary.info[key]
} else {
field[key] = dataDictionary.info[key]
}
})
}
return field
}
module.exports.convertToStandardCollection = (descriptor) => {
const standard = {
name: '',
title: '',
summary: '',
image: '',
count: null
}
standard.name = descriptor.name
standard.title = descriptor.title || descriptor.display_name
standard.summary = descriptor.description || ''
standard.image = descriptor.image_display_url || descriptor.image_url
standard.count = descriptor.package_count || 0
standard.extras = descriptor.extras || []
standard.groups = descriptor.groups || []
return standard
}
module.exports.convertToCkanSearchQuery = (query) => {
const ckanQuery = {
q: '',
fq: '',
rows: '',
start: '',
sort: '',
'facet.field': ['organization', 'groups', 'tags', 'res_format', 'license_id'],
'facet.limit': 5,
'facet.mincount': 0
}
// Split by space but ignore spaces within double quotes:
if (query.q) {
query.q.match(/(?:[^\s"]+|"[^"]*")+/g).forEach(part => {
if (part.includes(':')) {
ckanQuery.fq += part + ' '
} else {
ckanQuery.q += part + ' '
}
})
ckanQuery.fq = ckanQuery.fq.trim()
ckanQuery.q = ckanQuery.q.trim()
}
if (query.fq) {
ckanQuery.fq = ckanQuery.fq ? ckanQuery.fq + ' ' + query.fq : query.fq
}
// standard 'size' => ckan 'rows'
ckanQuery.rows = query.size || ''
// standard 'from' => ckan 'start'
ckanQuery.start = query.from || ''
// standard 'sort' => ckan 'sort'
const sortQueries = []
if (query.sort && query.sort.constructor == Object) {
for (let [key, value] of Object.entries(query.sort)) {
sortQueries.push(`${key} ${value}`)
}
ckanQuery.sort = sortQueries.join(',')
} else if (query.sort && query.sort.constructor == String) {
ckanQuery.sort = query.sort.replace(':', ' ')
} else if (query.sort && query.sort.constructor == Array) {
query.sort.forEach(sort => {
sortQueries.push(sort.replace(':', ' '))
})
ckanQuery.sort = sortQueries.join(',')
}
// Facets
ckanQuery['facet.field'] = query['facet.field'] || ckanQuery['facet.field']
ckanQuery['facet.limit'] = query['facet.limit'] || ckanQuery['facet.limit']
ckanQuery['facet.mincount'] = query['facet.mincount'] || ckanQuery['facet.mincount']
ckanQuery['facet.field'] = query['facet.field'] || ckanQuery['facet.field']
// Remove attributes with empty string, null or undefined values
Object.keys(ckanQuery).forEach((key) => (!ckanQuery[key]) && delete ckanQuery[key])
return ckanQuery
}
module.exports.pagination = (c, m) => {
let current = c,
last = m,
delta = 2,
left = current - delta,
right = current + delta + 1,
range = [],
rangeWithDots = [],
l;
range.push(1)
for (let i = c - delta; i <= c + delta; i++) {
if (i >= left && i < right && i < m && i > 1) {
range.push(i);
}
}
range.push(m)
for (let i of range) {
if (l) {
if (i - l === 2) {
rangeWithDots.push(l + 1);
} else if (i - l !== 1) {
rangeWithDots.push('...');
}
}
rangeWithDots.push(i);
l = i;
}
return rangeWithDots;
}
module.exports.processMarkdown = require('markdown-it')({
html: true,
linkify: true,
typographer: true
})
/**
* Process data package attributes prior to display to users.
* Process markdown
* Convert bytes to human readable format
* etc.
**/
module.exports.processDataPackage = function (datapackage) {
const newDatapackage = JSON.parse(JSON.stringify(datapackage))
if (newDatapackage.description) {
newDatapackage.descriptionHtml = module.exports.processMarkdown
.render(newDatapackage.description)
}
if (newDatapackage.readme) {
newDatapackage.readmeHtml = module.exports.processMarkdown
.render(newDatapackage.readme)
}
newDatapackage.formats = newDatapackage.formats || []
// Per each resource:
newDatapackage.resources.forEach(resource => {
if (resource.description) {
resource.descriptionHtml = module.exports.processMarkdown
.render(resource.description)
}
// Normalize format (lowercase)
if (resource.format) {
resource.format = resource.format.toLowerCase()
newDatapackage.formats.push(resource.format)
}
// Convert bytes into human-readable format:
if (resource.size) {
resource.sizeFormatted = bytes(resource.size, {decimalPlaces: 0})
}
})
return newDatapackage
}
/**
* Create 'displayResources' property which has:
* resource: Object containing resource descriptor
* api: API URL for the resource if available, e.g., Datastore
* proxy: path via proxy for the resource if available
* cc_proxy: path via CKAN Classic proxy if available
* slug: slugified name of a resource
**/
module.exports.prepareResourcesForDisplay = function (datapackage) {
const newDatapackage = JSON.parse(JSON.stringify(datapackage))
newDatapackage.displayResources = []
newDatapackage.resources.forEach((resource, index) => {
const api = resource.datastore_active
? config.get('API_URL') + 'datastore_search?resource_id=' + resource.id + '&sort=_id asc'
: null
// Use proxy path if datastore/filestore proxies are given:
let proxy, cc_proxy
try {
const resourceUrl = new URL(resource.path)
if (resourceUrl.host === config.get('PROXY_DATASTORE') && resource.format !== 'pdf') {
proxy = '/proxy/datastore' + resourceUrl.pathname + resourceUrl.search
}
if (resourceUrl.host === config.get('PROXY_FILESTORE') && resource.format !== 'pdf') {
proxy = '/proxy/filestore' + resourceUrl.pathname + resourceUrl.search
}
// Store a CKAN Classic proxy path
// https://github.com/ckan/ckan/blob/master/ckanext/resourceproxy/plugin.py#L59
const apiUrlObject = new URL(config.get('API_URL'))
cc_proxy = apiUrlObject.origin + `/dataset/${datapackage.id}/resource/${resource.id}/proxy`
} catch (e) {
console.warn(e)
}
const displayResource = {
resource,
api, // URI for getting the resource via API, e.g., Datastore. Useful when you want to fetch only 100 rows or similar.
proxy, // alternative for path in case there is CORS issue
cc_proxy,
slug: slugify(resource.name) + '-' + index // Used for anchor links
}
newDatapackage.displayResources.push(displayResource)
})
return newDatapackage
}
/**
* Prepare 'views' property which is used by 'datapackage-views-js' library to
* render visualizations such as tables, graphs and maps.
**/
module.exports.prepareViews = function (datapackage) {
const newDatapackage = JSON.parse(JSON.stringify(datapackage))
newDatapackage.views = newDatapackage.views || []
newDatapackage.resources.forEach(resource => {
const resourceViews = resource.views && resource.views.map(view => {
view.resources = [resource.name]
return view
})
newDatapackage.views = newDatapackage.views.concat(resourceViews)
})
return newDatapackage
}
/**
* Create 'dataExplorers' property which is used by 'data-explorer' library to
* render data explorer widgets.
**/
module.exports.prepareDataExplorers = function (datapackage) {
const newDatapackage = JSON.parse(JSON.stringify(datapackage))
newDatapackage.displayResources.forEach((displayResource, idx) => {
newDatapackage.displayResources[idx].dataExplorers = []
displayResource.resource.views && displayResource.resource.views.forEach(view => {
const widgets = []
if (view.specType === 'dataExplorer') {
view.spec.widgets.forEach((widget, index) => {
const widgetNames = {
table: 'Table',
simple: 'Chart',
tabularmap: 'Map'
}
widget = {
name: widgetNames[widget.specType] || 'Widget-' + index,
active: index === 0 ? true : false,
datapackage: {
views: [
{
id: view.id,
specType: widget.specType
}
]
}
}
widgets.push(widget)
})
} else {
const widget = {
name: view.title || '',
active: true,
datapackage: {
views: [view]
}
}
widgets.push(widget)
}
displayResource.resource.api = displayResource.resource.api || displayResource.api
const dataExplorer = JSON.stringify({
widgets,
datapackage: {
resources: [displayResource.resource]
}
}).replace(/'/g, "&#x27;")
newDatapackage.displayResources[idx].dataExplorers.push(dataExplorer)
})
})
return newDatapackage
}