[alan-turing][m] - individual pages (#828)

2023-05-01 21:06:52 -03:00 · 2023-05-01 21:06:52 -03:00 · 026059184a
commit 026059184a
parent a041d69282
19 changed files with 6022 additions and 404 deletions
--- a/examples/alan-turing-portal/content/contributing.md
+++ b/examples/alan-turing-portal/content/contributing.md
@ -0,0 +1,22 @@
+---
+title: Contributing
+---
+
+We accept entries to our catalogue based on pull requests to the content folder. The dataset must be avaliable for download to be included in the list. If you want to add an entry, follow these steps!
+
+Please send just one dataset addition/edit at a time - edit it in, then save. This will make everyone’s life easier (including yours!)
+
+- Go to the repo url file and click the "Add file" dropdown and then click on "Create new file".
+![](https://i.imgur.com/2PR0ZgL.png)
+   
+- In the following page type `content/datasets/<name-of-the-file>.md`. if you want to add an entry to the datasets catalog or `content/keywords/<name-of-the-file>.md` if you want to add an entry to the lists of abusive keywords.
+![](https://i.imgur.com/rr3uSYu.png)
+   
+- Copy the contents of `templates/dataset.md` or `templates/keywords.md` respectively to the camp below, filling out the fields with the correct data format
+![](https://i.imgur.com/x6JIjhz.png)
+   
+- Click on "Commit changes", on the popup make sure you give some brief detail on the proposed change.  and then click on Propose changes
+![](https://i.imgur.com/BxuxKEJ.png)
+   
+- Submit the pull request on the next page when prompted.
+
--- a/examples/alan-turing-portal/content/datasets/abusive-language-detection-on-arabic-social-media-al-jazeera.md
+++ b/examples/alan-turing-portal/content/datasets/abusive-language-detection-on-arabic-social-media-al-jazeera.md
@ -12,3 +12,5 @@ platform: ["AlJazeera"]
 medium: ["Text"]
 reference: "Mubarak, H., Darwish, K. and Magdy, W., 2017. Abusive Language Detection on Arabic Social Media. In: Proceedings of the First Workshop on Abusive Language Online. Vancouver, Canada: Association for Computational Linguistics, pp.52-56."
 ---
+
+SOMETHING TEST
--- a/examples/alan-turing-portal/content/datasets/detecting-abusive-albanian.md
+++ b/examples/alan-turing-portal/content/datasets/detecting-abusive-albanian.md
--- a/examples/alan-turing-portal/content/datasets/hate-speech-detection-in-the-bengali-language-a-dataset-and-its-baseline-evaluation.md
+++ b/examples/alan-turing-portal/content/datasets/hate-speech-detection-in-the-bengali-language-a-dataset-and-its-baseline-evaluation.md
@ -12,3 +12,4 @@ platform: ["Youtube", "Facebook"]
 medium: ["Text"]
 reference: "Romim, N., Ahmed, M., Talukder, H., & Islam, M. S. (2021). Hate speech detection in the bengali language: A dataset and its baseline evaluation. In Proceedings of International Joint Conference on Advances in Computational Intelligence (pp. 457-468). Springer, Singapore."
 ---
+
--- a/examples/alan-turing-portal/content/datasets/let-mi-an-arabic-levantine-twitter-dataset-for-mysogynistic-language.md
+++ b/examples/alan-turing-portal/content/datasets/let-mi-an-arabic-levantine-twitter-dataset-for-mysogynistic-language.md
--- a/examples/alan-turing-portal/content/index.md
+++ b/examples/alan-turing-portal/content/index.md
@ -1,3 +1,7 @@
+---
+title: Hate Speech Dataset Catalogue
+---
+
 This page catalogues datasets annotated for hate speech, online abuse, and offensive language. They may be useful for e.g. training a natural language processing system to detect this language.

 The list is maintained by Leon Derczynski, Bertie Vidgen, Hannah Rose Kirk, Pica Johansson, Yi-Ling Chung, Mads Guldborg Kjeldgaard Kongsbak, Laila Sprejer, and Philine Zeinert.
--- a/examples/alan-turing-portal/content/keywords/hurtlex.md
+++ b/examples/alan-turing-portal/content/keywords/hurtlex.md
@ -0,0 +1,10 @@
+---
+title: Hurtlex
+description: HurtLex is a lexicon of offensive, aggressive, and hateful words in over 50 languages. The words are divided into 17 categories, plus a macro-category indicating whether there is stereotype involved.
+data-link: https://github.com/valeriobasile/hurtlex
+reference: http://ceur-ws.org/Vol-2253/paper49.pdf, Proc. CLiC-it 2018
+---
+
+## Markdown TEST
+
+Some text
--- a/examples/alan-turing-portal/content/keywords/jiang-et-al.md
+++ b/examples/alan-turing-portal/content/keywords/jiang-et-al.md
@ -0,0 +1,5 @@
+---
+title: SexHateLex is a Chinese lexicon of hateful and sexist words.
+data-link: https://doi.org/10.5281/zenodo.4773875
+reference: http://ceur-ws.org/Vol-2253/paper49.pdf, Journal of OSNEM, Vol.27, 2022, 100182, ISSN 2468-6964.
+---
--- a/examples/alan-turing-portal/lib/markdown.js
+++ b/examples/alan-turing-portal/lib/markdown.js
@ -0,0 +1,105 @@
+import matter from "gray-matter";
+import mdxmermaid from "mdx-mermaid";
+import { h } from "hastscript";
+import remarkCallouts from "@flowershow/remark-callouts";
+import remarkEmbed from "@flowershow/remark-embed";
+import remarkGfm from "remark-gfm";
+import remarkMath from "remark-math";
+import remarkSmartypants from "remark-smartypants";
+import remarkToc from "remark-toc";
+import remarkWikiLink from "@flowershow/remark-wiki-link";
+import rehypeAutolinkHeadings from "rehype-autolink-headings";
+import rehypeKatex from "rehype-katex";
+import rehypeSlug from "rehype-slug";
+import rehypePrismPlus from "rehype-prism-plus";
+
+import { serialize } from "next-mdx-remote/serialize";
+
+/**
+ * Parse a markdown or MDX file to an MDX source form + front matter data
+ *
+ * @source: the contents of a markdown or mdx file
+ * @format: used to indicate to next-mdx-remote which format to use (md or mdx)
+ * @returns: { mdxSource: mdxSource, frontMatter: ...}
+ */
+const parse = async function (source, format) {
+  const { content, data, excerpt } = matter(source, {
+    excerpt: (file, options) => {
+      // Generate an excerpt for the file
+      file.excerpt = file.content.split("\n\n")[0];
+    },
+  });
+
+  const mdxSource = await serialize(
+    { value: content, path: format },
+    {
+      // Optionally pass remark/rehype plugins
+      mdxOptions: {
+        remarkPlugins: [
+          remarkEmbed,
+          remarkGfm,
+          [remarkSmartypants, { quotes: false, dashes: "oldschool" }],
+          remarkMath,
+          remarkCallouts,
+          remarkWikiLink,
+          [
+            remarkToc,
+            {
+              heading: "Table of contents",
+              tight: true,
+            },
+          ],
+          [mdxmermaid, {}],
+        ],
+        rehypePlugins: [
+          rehypeSlug,
+          [
+            rehypeAutolinkHeadings,
+            {
+              properties: { className: 'heading-link' },
+              test(element) {
+                return (
+                  ["h2", "h3", "h4", "h5", "h6"].includes(element.tagName) &&
+                  element.properties?.id !== "table-of-contents" &&
+                  element.properties?.className !== "blockquote-heading"
+                );
+              },
+              content() {
+                return [
+                  h(
+                    "svg",
+                    {
+                      xmlns: "http:www.w3.org/2000/svg",
+                      fill: "#ab2b65",
+                      viewBox: "0 0 20 20",
+                      className: "w-5 h-5",
+                    },
+                    [
+                      h("path", {
+                        fillRule: "evenodd",
+                        clipRule: "evenodd",
+                        d: "M9.493 2.853a.75.75 0 00-1.486-.205L7.545 6H4.198a.75.75 0 000 1.5h3.14l-.69 5H3.302a.75.75 0 000 1.5h3.14l-.435 3.148a.75.75 0 001.486.205L7.955 14h2.986l-.434 3.148a.75.75 0 001.486.205L12.456 14h3.346a.75.75 0 000-1.5h-3.14l.69-5h3.346a.75.75 0 000-1.5h-3.14l.435-3.147a.75.75 0 00-1.486-.205L12.045 6H9.059l.434-3.147zM8.852 7.5l-.69 5h2.986l.69-5H8.852z",
+                      }),
+                    ]
+                  ),
+                ];
+              },
+            },
+          ],
+          [rehypeKatex, { output: "mathml" }],
+          [rehypePrismPlus, { ignoreMissing: true }],
+        ],
+        format,
+      },
+      scope: data,
+    }
+  );
+
+  return {
+    mdxSource: mdxSource,
+    frontMatter: data,
+    excerpt,
+  };
+};
+
+export default parse;
--- a/examples/alan-turing-portal/markdown.db
+++ b/examples/alan-turing-portal/markdown.db
--- a/examples/alan-turing-portal/next-env.d.ts
+++ b/examples/alan-turing-portal/next-env.d.ts
@ -0,0 +1,5 @@
+/// <reference types="next" />
+/// <reference types="next/image-types/global" />
+
+// NOTE: This file should not be edited
+// see https://nextjs.org/docs/basic-features/typescript for more information.
--- a/examples/alan-turing-portal/package-lock.json
+++ b/examples/alan-turing-portal/package-lock.json
--- a/examples/alan-turing-portal/package.json
+++ b/examples/alan-turing-portal/package.json
@ -26,17 +26,42 @@
    "feed": "^4.2.2",
    "flexsearch": "^0.7.31",
    "focus-visible": "^5.2.0",
-    "next": "13.3.0",
    "next-router-mock": "^0.9.3",
    "next-superjson-plugin": "^0.5.7",
    "postcss-focus-visible": "^6.0.4",
-    "react": "18.2.0",
-    "react-dom": "18.2.0",
    "react-hook-form": "^7.43.9",
    "react-markdown": "^8.0.7",
-    "remark-gfm": "^3.0.1",
    "superjson": "^1.12.3",
-    "tailwindcss": "^3.3.0"
+    "tailwindcss": "^3.3.0",
+    "@flowershow/core": "^0.4.10",
+    "@flowershow/remark-callouts": "^1.0.0",
+    "@flowershow/remark-embed": "^1.0.0",
+    "@flowershow/remark-wiki-link": "^1.1.2",
+    "@heroicons/react": "^2.0.17",
+    "@opentelemetry/api": "^1.4.0",
+    "@tanstack/react-table": "^8.8.5",
+    "@types/node": "18.16.0",
+    "@types/react": "18.2.0",
+    "@types/react-dom": "18.2.0",
+    "eslint": "8.39.0",
+    "eslint-config-next": "13.3.1",
+    "gray-matter": "^4.0.3",
+    "hastscript": "^7.2.0",
+    "mdx-mermaid": "2.0.0-rc7",
+    "next": "13.2.1",
+    "next-mdx-remote": "^4.4.1",
+    "papaparse": "^5.4.1",
+    "react": "18.2.0",
+    "react-dom": "18.2.0",
+    "react-vega": "^7.6.0",
+    "rehype-autolink-headings": "^6.1.1",
+    "rehype-katex": "^6.0.3",
+    "rehype-prism-plus": "^1.5.1",
+    "rehype-slug": "^5.1.0",
+    "remark-gfm": "^3.0.1",
+    "remark-math": "^5.1.1",
+    "remark-smartypants": "^2.0.0",
+    "remark-toc": "^8.0.1"
  },
  "devDependencies": {
    "eslint": "8.26.0",
--- a/examples/alan-turing-portal/pages/[...slug].jsx
+++ b/examples/alan-turing-portal/pages/[...slug].jsx
@ -0,0 +1,99 @@
+import { Container } from '../components/Container'
+import clientPromise from '../lib/mddb'
+import fs from 'fs'
+import { MDXRemote } from 'next-mdx-remote'
+import { serialize } from 'next-mdx-remote/serialize'
+import { Card } from '../components/Card'
+
+export const getStaticProps = async ({ params }) => {
+  const urlPath = params.slug ? params.slug.join('/') : ''
+
+  const mddb = await clientPromise
+  const dbFile = await mddb.getFileByUrl(urlPath)
+
+  const source = fs.readFileSync(dbFile.file_path, { encoding: 'utf-8' })
+  const mdxSource = await serialize(source, { parseFrontmatter: true })
+
+  return {
+    props: {
+      mdxSource,
+    },
+  }
+}
+
+export async function getStaticPaths() {
+  const mddb = await clientPromise
+  const allDocuments = await mddb.getFiles({ extensions: ['md', 'mdx'] })
+
+  const paths = allDocuments.map((page) => {
+    const parts = page.url_path.split('/')
+    return { params: { slug: parts } }
+  })
+
+  return {
+    paths,
+    fallback: false,
+  }
+}
+
+const isValidUrl = (urlString) => {
+  try {
+    return Boolean(new URL(urlString))
+  } catch (e) {
+    return false
+  }
+}
+
+const Meta = ({keyValuePairs}) => {
+  const prettifyMetaValue = (value) => value.replaceAll('-',' ').charAt(0).toUpperCase() + value.replaceAll('-',' ').slice(1);
+  return (
+    <>
+      {keyValuePairs.map((entry) => {
+        return isValidUrl(entry[1]) ? (
+          <Card.Description>
+            <span className="font-semibold">
+              {prettifyMetaValue(entry[0])}: {' '}
+            </span>
+              <a
+                className="text-ellipsis underline transition hover:text-teal-400 dark:hover:text-teal-900"
+                href={entry[1]}
+              >
+                {entry[1]}
+              </a>
+          </Card.Description>
+        ) : (
+          <Card.Description>
+            <span className="font-semibold">{prettifyMetaValue(entry[0])}: </span>
+            {Array.isArray(entry[1]) ? entry[1].join(', ') : entry[1]}
+          </Card.Description>
+        )
+      })}
+    </>
+  )
+}
+
+export default function DRDPage({ mdxSource }) {
+  const meta = mdxSource.frontmatter
+  const keyValuePairs = Object.entries(meta).filter(
+    (entry) => entry[0] !== 'title'
+  )
+  return (
+    <>
+      <Container className="mt-16 lg:mt-32">
+        <article>
+          <header className="flex flex-col">
+            <h1 className="mt-6 text-4xl font-bold tracking-tight text-zinc-800 dark:text-zinc-100 sm:text-5xl">
+              {meta.title}
+            </h1>
+            <Card as="article">
+              <Meta keyValuePairs={keyValuePairs} />
+            </Card>
+          </header>
+          <div className="prose dark:prose-invert">
+            <MDXRemote {...mdxSource} />
+          </div>
+        </article>
+      </Container>
+    </>
+  )
+}
--- a/examples/alan-turing-portal/pages/index.jsx
+++ b/examples/alan-turing-portal/pages/index.jsx
@ -3,19 +3,22 @@ import fs from 'fs'

 import { Card } from '../components/Card'
 import { Container } from '../components/Container'
-import clientPromise from '@/lib/mddb'
+import clientPromise from '../lib/mddb'
 import ReactMarkdown from 'react-markdown'
 import { Index } from 'flexsearch'
 import { useForm } from 'react-hook-form'
+import Link from 'next/link'
+import { serialize } from 'next-mdx-remote/serialize'
+import { MDXRemote } from 'next-mdx-remote'

 function DatasetCard({ dataset }) {
  return (
    <Card as="article">
-      <Card.Title>{dataset.title}</Card.Title>
+      <Card.Title><Link href={dataset.url}>{dataset.title}</Link></Card.Title>
      <Card.Description>
        <span className="font-semibold">Link to publication: </span>{' '}
        <a
-          className="underline transition hover:text-teal-400 dark:hover:text-teal-900 text-ellipsis"
+          className="text-ellipsis underline transition hover:text-teal-400 dark:hover:text-teal-900"
          href={dataset['link-to-publication']}
        >
          {dataset['link-to-publication']}
@ -24,7 +27,7 @@ function DatasetCard({ dataset }) {
      <Card.Description>
        <span className="font-semibold">Link to data: </span>
        <a
-          className="underline transition hover:text-teal-600 dark:hover:text-teal-900 text-ellipsis"
+          className="text-ellipsis underline transition hover:text-teal-600 dark:hover:text-teal-900"
          href={dataset['link-to-data']}
        >
          {dataset['link-to-data']}
@ -69,14 +72,61 @@ function DatasetCard({ dataset }) {
    </Card>
  )
 }
-export default function Home({ datasets, indexText, availableLanguages, availablePlatforms }) {
+
+function ListOfAbusiveKeywordsCard({ list }) {
+  return (
+    <Card as="article">
+      <Card.Title><Link href={list.url}>{list.title}</Link></Card.Title>
+      {list.description && (
+        <Card.Description>
+          <span className="font-semibold">List Description: </span>{' '}
+          {list.description}
+        </Card.Description>
+      )}
+      <Card.Description>
+        <span className="font-semibold">Data Link: </span>
+        <a
+          className="text-ellipsis underline transition hover:text-teal-600 dark:hover:text-teal-900"
+          href={list['data-link']}
+        >
+          {list['data-link']}
+        </a>
+      </Card.Description>
+      <Card.Description>
+        <span className="font-semibold">Reference: </span>
+        <a
+          className="text-ellipsis underline transition hover:text-teal-600 dark:hover:text-teal-900"
+          href={list.reference}
+        >
+          {list.reference}
+        </a>
+      </Card.Description>
+    </Card>
+  )
+}
+
+export default function Home({
+  datasets,
+  indexText,
+  listsOfKeywords,
+  contributingText,
+  availableLanguages,
+  availablePlatforms,
+}) {
  const index = new Index()
-  datasets.forEach((dataset) => index.add(dataset.id, `${dataset.title} ${dataset['task-description']} ${dataset['details-of-task']} ${dataset['reference']}`))
-  const { register, watch } = useForm({ defaultValues: {
-    searchTerm: '',
-    lang: '',
-    platform: ''
-  }})
+  datasets.forEach((dataset) =>
+    index.add(
+      dataset.id,
+      `${dataset.title} ${dataset['task-description']} ${dataset['details-of-task']} ${dataset['reference']}`
+    )
+  )
+  const { register, watch, handleSubmit, reset } = useForm({
+    defaultValues: {
+      searchTerm: '',
+      lang: '',
+      platform: '',
+    },
+  })
  return (
    <>
      <Head>
@ -89,49 +139,68 @@ export default function Home({ datasets, indexText, availableLanguages, availabl
      <Container className="mt-9">
        <div className="max-w-2xl">
          <h1 className="text-4xl font-bold tracking-tight text-zinc-800 dark:text-zinc-100 sm:text-5xl">
-            Hate Speech Dataset Catalogue
+            {indexText.frontmatter.title}
          </h1>
          <article className="mt-6 flex flex-col gap-y-2 text-base text-zinc-600 dark:text-zinc-400">
-            <ReactMarkdown>{indexText}</ReactMarkdown>
+            <MDXRemote {...indexText} />
          </article>
        </div>
      </Container>
      <Container className="mt-24 md:mt-28">
-        <div className="mx-auto grid max-w-xl grid-cols-1 gap-y-8 lg:max-w-none">
-          <form className="rounded-2xl border border-zinc-100 px-4 py-6 sm:p-6 dark:border-zinc-700/40">
+        <div className="mx-auto grid max-w-7xl grid-cols-1 gap-y-8 lg:max-w-none">
+          <h2 className="text-xl font-bold tracking-tight text-zinc-800 dark:text-zinc-100 sm:text-5xl">
+            Datasets
+          </h2>
+          <form onSubmit={handleSubmit(() => reset())} className="rounded-2xl border border-zinc-100 px-4 py-6 dark:border-zinc-700/40 sm:p-6">
            <p className="mt-2 text-lg font-semibold text-zinc-600 dark:text-zinc-100">
              Search for datasets
            </p>
-            <div className="mt-6 flex flex-col sm:flex-row gap-3">
+            <div className="mt-6 flex flex-col gap-3 sm:flex-row">
              <input
                placeholder="Search here"
                aria-label="Hate speech on Twitter"
-                required
                {...register('searchTerm')}
                className="min-w-0 flex-auto appearance-none rounded-md border border-zinc-900/10 bg-white px-3 py-[calc(theme(spacing.2)-1px)] shadow-md shadow-zinc-800/5 placeholder:text-zinc-600 focus:border-teal-500 focus:outline-none focus:ring-4 focus:ring-teal-500/10 dark:border-zinc-700 dark:bg-zinc-700/[0.15] dark:text-zinc-200 dark:placeholder:text-zinc-200 dark:focus:border-teal-400 dark:focus:ring-teal-400/10 sm:text-sm"
              />
              <select
                placeholder="Language"
                defaultValue=""
-                className="min-w-0 flex-auto text-zinc-600 appearance-none rounded-md border border-zinc-900/10 bg-white px-3 py-[calc(theme(spacing.2)-1px)] shadow-md shadow-zinc-800/5 placeholder:text-zinc-400 focus:border-teal-500 focus:outline-none focus:ring-4 focus:ring-teal-500/10 dark:border-zinc-700 dark:bg-zinc-700/[0.15] dark:text-zinc-200 dark:placeholder:text-zinc-500 dark:focus:border-teal-400 dark:focus:ring-teal-400/10 sm:text-sm"
+                className="min-w-0 flex-auto appearance-none rounded-md border border-zinc-900/10 bg-white px-3 py-[calc(theme(spacing.2)-1px)] text-zinc-600 shadow-md shadow-zinc-800/5 placeholder:text-zinc-400 focus:border-teal-500 focus:outline-none focus:ring-4 focus:ring-teal-500/10 dark:border-zinc-700 dark:bg-zinc-700/[0.15] dark:text-zinc-200 dark:placeholder:text-zinc-500 dark:focus:border-teal-400 dark:focus:ring-teal-400/10 sm:text-sm"
                {...register('lang')}
              >
-                <option value="" disabled hidden>Filter by language</option>
+                <option value="" disabled hidden>
+                  Filter by language
+                </option>
                {availableLanguages.map((lang) => (
-                  <option key={lang} className='dark:bg-white dark:text-black' value={lang}>{lang}</option>
+                  <option
+                    key={lang}
+                    className="dark:bg-white dark:text-black"
+                    value={lang}
+                  >
+                    {lang}
+                  </option>
                ))}
              </select>
              <select
                placeholder="Platform"
                defaultValue=""
-                className="min-w-0 flex-auto text-zinc-600 appearance-none rounded-md border border-zinc-900/10 bg-white px-3 py-[calc(theme(spacing.2)-1px)] shadow-md shadow-zinc-800/5 placeholder:text-zinc-400 focus:border-teal-500 focus:outline-none focus:ring-4 focus:ring-teal-500/10 dark:border-zinc-700 dark:bg-zinc-700/[0.15] dark:text-zinc-200 dark:placeholder:text-zinc-500 dark:focus:border-teal-400 dark:focus:ring-teal-400/10 sm:text-sm"
+                className="min-w-0 flex-auto appearance-none rounded-md border border-zinc-900/10 bg-white px-3 py-[calc(theme(spacing.2)-1px)] text-zinc-600 shadow-md shadow-zinc-800/5 placeholder:text-zinc-400 focus:border-teal-500 focus:outline-none focus:ring-4 focus:ring-teal-500/10 dark:border-zinc-700 dark:bg-zinc-700/[0.15] dark:text-zinc-200 dark:placeholder:text-zinc-500 dark:focus:border-teal-400 dark:focus:ring-teal-400/10 sm:text-sm"
                {...register('platform')}
              >
-                <option value="" disabled hidden>Filter by platform</option>
+                <option value="" disabled hidden>
+                  Filter by platform
+                </option>
                {availablePlatforms.map((platform) => (
-                  <option key={platform} className='dark:bg-white dark:text-black' value={platform}>{platform}</option>
+                  <option
+                    key={platform}
+                    className="dark:bg-white dark:text-black"
+                    value={platform}
+                  >
+                    {platform}
+                  </option>
                ))}
              </select>
+              <button type='submit' className='inline-flex items-center gap-2 justify-center rounded-md py-2 px-3 text-sm outline-offset-2 transition active:transition-none bg-zinc-800 font-semibold text-zinc-100 hover:bg-zinc-700 active:bg-zinc-800 active:text-zinc-100/70 dark:bg-zinc-700 dark:hover:bg-zinc-600 dark:active:bg-zinc-700 dark:active:text-zinc-100/70 flex-none'>Clear filters</button>
            </div>
          </form>
          <div className="flex flex-col gap-16">
@ -157,24 +226,70 @@ export default function Home({ datasets, indexText, availableLanguages, availabl
          </div>
        </div>
      </Container>
+      <Container className="mt-16">
+        <h2 className="text-xl font-bold tracking-tight text-zinc-800 dark:text-zinc-100 sm:text-5xl">
+          Lists of Abusive Keywords
+        </h2>
+        <div className="mt-3 flex flex-col gap-16">
+          {listsOfKeywords.map((list) => (
+            <ListOfAbusiveKeywordsCard key={list.title} list={list} />
+          ))}
+        </div>
+      </Container>
+      <Container className="mt-16">
+        <h2 className="text-xl font-bold tracking-tight text-zinc-800 dark:text-zinc-100 sm:text-5xl">
+          How to contribute
+        </h2>
+          <article className="mt-6 flex flex-col gap-y-8 text-base text-zinc-600 dark:text-zinc-400 contributing">
+            <MDXRemote {...contributingText} />
+        </article>
+        </Container>
    </>
  )
 }

 export async function getStaticProps() {
  const mddb = await clientPromise
-  const allPages = await mddb.getFiles({ extensions: ['md', 'mdx'] })
-  const datasets = allPages
-    .filter((page) => page.url_path !== '/')
-    .map((page) => ({ ...page.metadata, id: page._id }))
-  const index = allPages.filter((page) => page.url_path === '/')[0]
-  const source = fs.readFileSync(index.file_path, { encoding: 'utf-8' })
-  const availableLanguages = [... new Set(datasets.map((dataset) => dataset.language))]
-  const availablePlatforms = [... new Set(datasets.map((dataset) => dataset.platform).flat())]
+  const datasetPages = await mddb.getFiles({
+    folder: 'datasets',
+    extensions: ['md', 'mdx'],
+  })
+  const datasets = datasetPages.map((page) => ({
+    ...page.metadata,
+    id: page._id,
+    url: page.url_path,
+  }))
+  const listsOfKeywordsPages = await mddb.getFiles({
+    folder: 'keywords',
+    extensions: ['md', 'mdx'],
+  })
+  const listsOfKeywords = listsOfKeywordsPages.map((page) => ({
+    ...page.metadata,
+    id: page._id,
+    url: page.url_path,
+  }))
+
+  const index = await mddb.getFileByUrl('/')
+  const contributing = await mddb.getFileByUrl('contributing')
+  let indexSource = fs.readFileSync(index.file_path, { encoding: 'utf-8' })
+  let contributingSource = fs.readFileSync(contributing.file_path, {
+    encoding: 'utf-8',
+  })
+  contributingSource = await serialize(contributingSource, { parseFrontmatter: true })
+  indexSource = await serialize(indexSource, { parseFrontmatter: true })
+
+  const availableLanguages = [
+    ...new Set(datasets.map((dataset) => dataset.language)),
+  ]
+  const availablePlatforms = [
+    ...new Set(datasets.map((dataset) => dataset.platform).flat()),
+  ]
  return {
    props: {
-      indexText: source,
      datasets,
+      listsOfKeywords,
+      indexText: indexSource,
+      contributingText: contributingSource,
      availableLanguages,
      availablePlatforms,
    },
--- a/examples/alan-turing-portal/styles/tailwind.css
+++ b/examples/alan-turing-portal/styles/tailwind.css
@ -2,3 +2,7 @@
@import 'tailwindcss/components';
@import './prism.css';
@import 'tailwindcss/utilities';
+
+.contributing li {
+  margin-bottom: 1.75rem;
+}
--- a/examples/alan-turing-portal/templates/dataset.md
+++ b/examples/alan-turing-portal/templates/dataset.md
@ -0,0 +1,14 @@
+---
+title: string
+link-to-publication: url
+link-to-data: url
+task-description: string
+details-of-task: string
+size-of-dataset: number
+percentage-abusive: number
+language: string
+level-of-annotation: list eg: ["Posts", "Comments", ...]
+platform: list eg: ["Youtube", "Facebook", ...]
+medium: list eg: ["Text", "Emojis", "Images", ...]
+reference: string
+---
--- a/examples/alan-turing-portal/templates/list-of-keywords.md
+++ b/examples/alan-turing-portal/templates/list-of-keywords.md
@ -0,0 +1,5 @@
+---
+title: string
+data-link: url
+reference: string
+---
--- a/examples/alan-turing-portal/tsconfig.json
+++ b/examples/alan-turing-portal/tsconfig.json
@ -0,0 +1,28 @@
+{
+  "compilerOptions": {
+    "lib": [
+      "dom",
+      "dom.iterable",
+      "esnext"
+    ],
+    "allowJs": true,
+    "skipLibCheck": true,
+    "strict": false,
+    "forceConsistentCasingInFileNames": true,
+    "noEmit": true,
+    "incremental": true,
+    "esModuleInterop": true,
+    "moduleResolution": "node",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "jsx": "preserve"
+  },
+  "include": [
+    "next-env.d.ts",
+    "**/*.ts",
+    "**/*.tsx"
+  ],
+  "exclude": [
+    "node_modules"
+  ]
+}