import Head from 'next/head'
import fs from 'fs'
import { Card } from '../components/Card'
import { Container } from '../components/Container'
import clientPromise from '../lib/mddb'
import { Index } from 'flexsearch'
import { useForm } from 'react-hook-form'
import Link from 'next/link'
import { serialize } from 'next-mdx-remote/serialize'
import { MDXRemote } from 'next-mdx-remote'
function DatasetCard({ dataset }) {
return (
{dataset.title}
Link to publication: {' '}
{dataset['link-to-publication']}
Link to data:
{dataset['link-to-data']}
Task Description:
{dataset['task-description']}
Details of Task: {' '}
{dataset['details-of-task']}
Size of Dataset: {' '}
{dataset['size-of-dataset']}
Percentage Abusive:
{dataset['percentage-abusive']}%
Language:
{dataset['language']}
Level of Annotation:
{dataset['level-of-annotation'].join(', ')}
Platform:
{dataset['platform'].join(', ')}
Medium:
{dataset['medium'].join(', ')}
Reference:
{dataset['reference']}
)
}
function ListOfAbusiveKeywordsCard({ list }) {
return (
{list.title}
{list.description && (
List Description: {' '}
{list.description}
)}
Data Link:
{list['data-link']}
Reference:
{list.reference}
)
}
export default function Home({
datasets,
indexText,
listsOfKeywords,
availableLanguages,
availablePlatforms,
}) {
const index = new Index()
datasets.forEach((dataset) =>
index.add(
dataset.id,
`${dataset.title} ${dataset['task-description']} ${dataset['details-of-task']} ${dataset['reference']}`
)
)
const { register, watch, handleSubmit, reset } = useForm({
defaultValues: {
searchTerm: '',
lang: '',
platform: '',
},
})
return (
<>
Hate Speech Dataset Catalogue
{indexText.frontmatter.title}
{datasets
.filter((dataset) =>
watch().searchTerm && watch().searchTerm !== ''
? index.search(watch().searchTerm).includes(dataset.id)
: true
)
.filter((dataset) =>
watch().lang && watch().lang !== ''
? dataset.language === watch().lang
: true
)
.filter((dataset) =>
watch().platform && watch().platform !== ''
? dataset.platform.includes(watch().platform)
: true
)
.map((dataset) => (
))}
{listsOfKeywords.map((list) => (
))}
>
)
}
export async function getStaticProps() {
const mddb = await clientPromise
const datasetPages = await mddb.getFiles({
folder: 'datasets',
extensions: ['md', 'mdx'],
})
const datasets = datasetPages.map((page) => ({
...page.metadata,
id: page._id,
url: page.url_path,
}))
const listsOfKeywordsPages = await mddb.getFiles({
folder: 'keywords',
extensions: ['md', 'mdx'],
})
const listsOfKeywords = listsOfKeywordsPages.map((page) => ({
...page.metadata,
id: page._id,
url: page.url_path,
}))
const index = await mddb.getFileByUrl('/')
let indexSource = fs.readFileSync(index.file_path, { encoding: 'utf-8' })
indexSource = await serialize(indexSource, { parseFrontmatter: true })
const availableLanguages = [
...new Set(datasets.map((dataset) => dataset.language)),
]
const availablePlatforms = [
...new Set(datasets.map((dataset) => dataset.platform).flat()),
]
return {
props: {
datasets,
listsOfKeywords,
indexText: indexSource,
availableLanguages,
availablePlatforms,
},
}
}