[components][m] - move catalog to @portaljs/components
This commit is contained in:
48
packages/components/package-lock.json
generated
48
packages/components/package-lock.json
generated
@@ -1,19 +1,22 @@
|
||||
{
|
||||
"name": "@portaljs/components",
|
||||
"version": "0.0.1",
|
||||
"version": "0.0.3",
|
||||
"lockfileVersion": 2,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@portaljs/components",
|
||||
"version": "0.0.1",
|
||||
"version": "0.0.3",
|
||||
"dependencies": {
|
||||
"@heroicons/react": "^2.0.17",
|
||||
"@tanstack/react-table": "^8.8.5",
|
||||
"@types/flexsearch": "^0.7.3",
|
||||
"flexsearch": "0.7.21",
|
||||
"next-mdx-remote": "^4.4.1",
|
||||
"papaparse": "^5.4.1",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"react-hook-form": "^7.43.9",
|
||||
"react-vega": "^7.6.0",
|
||||
"vega": "5.20.2",
|
||||
"vega-lite": "5.1.0"
|
||||
@@ -4466,6 +4469,11 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/flexsearch": {
|
||||
"version": "0.7.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/flexsearch/-/flexsearch-0.7.3.tgz",
|
||||
"integrity": "sha512-HXwADeHEP4exXkCIwy2n1+i0f1ilP1ETQOH5KDOugjkTFZPntWo0Gr8stZOaebkxsdx+k0X/K6obU/+it07ocg=="
|
||||
},
|
||||
"node_modules/@types/glob": {
|
||||
"version": "8.1.0",
|
||||
"dev": true,
|
||||
@@ -8008,6 +8016,11 @@
|
||||
"dev": true,
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/flexsearch": {
|
||||
"version": "0.7.21",
|
||||
"resolved": "https://registry.npmjs.org/flexsearch/-/flexsearch-0.7.21.tgz",
|
||||
"integrity": "sha512-W7cHV7Hrwjid6lWmy0IhsWDFQboWSng25U3VVywpHOTJnnAZNPScog67G+cVpeX9f7yDD21ih0WDrMMT+JoaYg=="
|
||||
},
|
||||
"node_modules/flow-parser": {
|
||||
"version": "0.205.0",
|
||||
"dev": true,
|
||||
@@ -11993,6 +12006,21 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/react-hook-form": {
|
||||
"version": "7.43.9",
|
||||
"resolved": "https://registry.npmjs.org/react-hook-form/-/react-hook-form-7.43.9.tgz",
|
||||
"integrity": "sha512-AUDN3Pz2NSeoxQ7Hs6OhQhDr6gtF9YRuutGDwPQqhSUAHJSgGl2VeY3qN19MG0SucpjgDiuMJ4iC5T5uB+eaNQ==",
|
||||
"engines": {
|
||||
"node": ">=12.22.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/react-hook-form"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "^16.8.0 || ^17 || ^18"
|
||||
}
|
||||
},
|
||||
"node_modules/react-inspector": {
|
||||
"version": "6.0.1",
|
||||
"dev": true,
|
||||
@@ -18010,6 +18038,11 @@
|
||||
"version": "3.2.1",
|
||||
"dev": true
|
||||
},
|
||||
"@types/flexsearch": {
|
||||
"version": "0.7.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/flexsearch/-/flexsearch-0.7.3.tgz",
|
||||
"integrity": "sha512-HXwADeHEP4exXkCIwy2n1+i0f1ilP1ETQOH5KDOugjkTFZPntWo0Gr8stZOaebkxsdx+k0X/K6obU/+it07ocg=="
|
||||
},
|
||||
"@types/glob": {
|
||||
"version": "8.1.0",
|
||||
"dev": true,
|
||||
@@ -20245,6 +20278,11 @@
|
||||
"version": "3.2.7",
|
||||
"dev": true
|
||||
},
|
||||
"flexsearch": {
|
||||
"version": "0.7.21",
|
||||
"resolved": "https://registry.npmjs.org/flexsearch/-/flexsearch-0.7.21.tgz",
|
||||
"integrity": "sha512-W7cHV7Hrwjid6lWmy0IhsWDFQboWSng25U3VVywpHOTJnnAZNPScog67G+cVpeX9f7yDD21ih0WDrMMT+JoaYg=="
|
||||
},
|
||||
"flow-parser": {
|
||||
"version": "0.205.0",
|
||||
"dev": true
|
||||
@@ -22587,6 +22625,12 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"react-hook-form": {
|
||||
"version": "7.43.9",
|
||||
"resolved": "https://registry.npmjs.org/react-hook-form/-/react-hook-form-7.43.9.tgz",
|
||||
"integrity": "sha512-AUDN3Pz2NSeoxQ7Hs6OhQhDr6gtF9YRuutGDwPQqhSUAHJSgGl2VeY3qN19MG0SucpjgDiuMJ4iC5T5uB+eaNQ==",
|
||||
"requires": {}
|
||||
},
|
||||
"react-inspector": {
|
||||
"version": "6.0.1",
|
||||
"dev": true,
|
||||
|
||||
@@ -25,14 +25,17 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@heroicons/react": "^2.0.17",
|
||||
"@tanstack/react-table": "^8.8.5",
|
||||
"@types/flexsearch": "^0.7.3",
|
||||
"flexsearch": "0.7.21",
|
||||
"next-mdx-remote": "^4.4.1",
|
||||
"papaparse": "^5.4.1",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"react-hook-form": "^7.43.9",
|
||||
"react-vega": "^7.6.0",
|
||||
"vega": "5.20.2",
|
||||
"vega-lite": "5.1.0",
|
||||
"@tanstack/react-table": "^8.8.5"
|
||||
"vega-lite": "5.1.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@storybook/addon-essentials": "^7.0.7",
|
||||
|
||||
119
packages/components/src/components/Catalog.tsx
Normal file
119
packages/components/src/components/Catalog.tsx
Normal file
@@ -0,0 +1,119 @@
|
||||
import { Index } from 'flexsearch';
|
||||
import { useState } from 'react';
|
||||
import DebouncedInput from './DebouncedInput';
|
||||
import { useForm } from 'react-hook-form';
|
||||
|
||||
export function Catalog({
|
||||
datasets,
|
||||
facets,
|
||||
}: {
|
||||
datasets: any[];
|
||||
facets: string[];
|
||||
}) {
|
||||
const [indexFilter, setIndexFilter] = useState('');
|
||||
const index = new Index({ tokenize: 'full' });
|
||||
datasets.forEach((dataset) =>
|
||||
index.add(
|
||||
dataset._id,
|
||||
//This will join every metadata value + the url_path into one big string and index that
|
||||
Object.entries(dataset.metadata).reduce(
|
||||
(acc, curr) => acc + ' ' + curr[1].toString(),
|
||||
''
|
||||
) +
|
||||
' ' +
|
||||
dataset.url_path
|
||||
)
|
||||
);
|
||||
|
||||
const facetValues = facets
|
||||
? facets.reduce((acc, facet) => {
|
||||
const possibleValues = datasets.reduce((acc, curr) => {
|
||||
const facetValue = curr.metadata[facet];
|
||||
if (facetValue) {
|
||||
return Array.isArray(facetValue)
|
||||
? acc.concat(facetValue)
|
||||
: acc.concat([facetValue]);
|
||||
}
|
||||
return acc;
|
||||
}, []);
|
||||
acc[facet] = {
|
||||
possibleValues: [...new Set(possibleValues)],
|
||||
selectedValue: null,
|
||||
};
|
||||
return acc;
|
||||
}, {})
|
||||
: [];
|
||||
|
||||
const { register, watch } = useForm(facetValues);
|
||||
|
||||
const filteredDatasets = datasets
|
||||
// First filter by flex search
|
||||
.filter((dataset) =>
|
||||
indexFilter !== ''
|
||||
? index.search(indexFilter).includes(dataset._id)
|
||||
: true
|
||||
)
|
||||
//Then check if the selectedValue for the given facet is included in the dataset metadata
|
||||
.filter((dataset) => {
|
||||
//Avoids a server rendering breakage
|
||||
if (!watch() || Object.keys(watch()).length === 0) return true
|
||||
//This will filter only the key pairs of the metadata values that were selected as facets
|
||||
const datasetFacets = Object.entries(dataset.metadata).filter((entry) =>
|
||||
facets.includes(entry[0])
|
||||
);
|
||||
//Check if the value present is included in the selected value in the form
|
||||
return datasetFacets.every((elem) =>
|
||||
watch()[elem[0]].selectedValue
|
||||
? (elem[1] as string | string[]).includes(
|
||||
watch()[elem[0]].selectedValue
|
||||
)
|
||||
: true
|
||||
);
|
||||
});
|
||||
|
||||
return (
|
||||
<>
|
||||
<DebouncedInput
|
||||
value={indexFilter ?? ''}
|
||||
onChange={(value) => setIndexFilter(String(value))}
|
||||
className="p-2 text-sm shadow border border-block mr-1"
|
||||
placeholder="Search all datasets..."
|
||||
/>
|
||||
{Object.entries(facetValues).map((elem) => (
|
||||
<select
|
||||
key={elem[0]}
|
||||
defaultValue=""
|
||||
className="p-2 ml-1 text-sm shadow border border-block"
|
||||
{...register(elem[0] + '.selectedValue')}
|
||||
>
|
||||
<option value="">
|
||||
Filter by {elem[0]}
|
||||
</option>
|
||||
{(elem[1] as { possibleValues: string[] }).possibleValues.map(
|
||||
(val) => (
|
||||
<option
|
||||
key={val}
|
||||
className="dark:bg-white dark:text-black"
|
||||
value={val}
|
||||
>
|
||||
{val}
|
||||
</option>
|
||||
)
|
||||
)}
|
||||
</select>
|
||||
))}
|
||||
<ul className='mb-5 pl-6 mt-5 list-disc'>
|
||||
{filteredDatasets.map((dataset) => (
|
||||
<li className='py-2' key={dataset._id}>
|
||||
<a className='font-medium underline' href={dataset.url_path}>
|
||||
{dataset.metadata.title
|
||||
? dataset.metadata.title
|
||||
: dataset.url_path}
|
||||
</a>
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
export * from "./components/Table";
|
||||
export * from "./components/Catalog";
|
||||
export * from "./components/LineChart";
|
||||
export * from "./components/Vega";
|
||||
export * from "./components/VegaLite";
|
||||
export * from "./components/VegaLite";
|
||||
|
||||
226
packages/components/stories/Catalog.stories.ts
Normal file
226
packages/components/stories/Catalog.stories.ts
Normal file
@@ -0,0 +1,226 @@
|
||||
import type { Meta, StoryObj } from '@storybook/react';
|
||||
|
||||
import { Catalog } from '../src/components/Catalog';
|
||||
|
||||
// More on how to set up stories at: https://storybook.js.org/docs/react/writing-stories/introduction
|
||||
const meta: Meta = {
|
||||
title: 'Components/Catalog',
|
||||
component: Catalog,
|
||||
tags: ['autodocs'],
|
||||
argTypes: {
|
||||
datasets: {
|
||||
description:
|
||||
'Lists of datasets to be displayed in the list, will usually be automatically available',
|
||||
},
|
||||
facets: {
|
||||
description:
|
||||
'List of frontmatter fields that should be used as filters, needs to match exactly with the field name',
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
export default meta;
|
||||
|
||||
type Story = StoryObj<{ datasets: any; facets: string[] }>;
|
||||
|
||||
// More on writing stories with args: https://storybook.js.org/docs/react/writing-stories/args
|
||||
export const WithoutFacets: Story = {
|
||||
name: 'Catalog without facets',
|
||||
args: {
|
||||
datasets: [
|
||||
{
|
||||
_id: '07026b22d49916754df1dc8ffb9ccd1c31878aae',
|
||||
url_path: 'dataset-4',
|
||||
file_path: 'content/dataset-4/index.md',
|
||||
metadata: {
|
||||
title: 'Detecting Abusive Albanian',
|
||||
'link-to-publication': 'https://arxiv.org/abs/2107.13592',
|
||||
'link-to-data': 'https://doi.org/10.6084/m9.figshare.19333298.v1',
|
||||
'task-description':
|
||||
'Hierarchical (offensive/not; untargeted/targeted; person/group/other)',
|
||||
'details-of-task':
|
||||
'Detect and categorise abusive language in social media data',
|
||||
'size-of-dataset': 11874,
|
||||
'percentage-abusive': 13.2,
|
||||
language: 'Albanian',
|
||||
'level-of-annotation': ['Posts'],
|
||||
platform: ['Instagram', 'Youtube'],
|
||||
medium: ['Text'],
|
||||
reference:
|
||||
'Nurce, E., Keci, J., Derczynski, L., 2021. Detecting Abusive Albanian. arXiv:2107.13592',
|
||||
},
|
||||
},
|
||||
{
|
||||
_id: '42c86cf3c4fbbab11d91c2a7d6dcb8f750bc4e19',
|
||||
url_path: 'dataset-1',
|
||||
file_path: 'content/dataset-1/index.md',
|
||||
metadata: {
|
||||
title: 'AbuseEval v1.0',
|
||||
'link-to-publication':
|
||||
'http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.760.pdf',
|
||||
'link-to-data': 'https://github.com/tommasoc80/AbuseEval',
|
||||
'task-description':
|
||||
'Explicitness annotation of offensive and abusive content',
|
||||
'details-of-task':
|
||||
'Enriched versions of the OffensEval/OLID dataset with the distinction of explicit/implicit offensive messages and the new dimension for abusive messages. Labels for offensive language: EXPLICIT, IMPLICT, NOT; Labels for abusive language: EXPLICIT, IMPLICT, NOTABU',
|
||||
'size-of-dataset': 14100,
|
||||
'percentage-abusive': 20.75,
|
||||
language: 'English',
|
||||
'level-of-annotation': ['Tweets'],
|
||||
platform: ['Twitter'],
|
||||
medium: ['Text'],
|
||||
reference:
|
||||
'Caselli, T., Basile, V., Jelena, M., Inga, K., and Michael, G. 2020. "I feel offended, don’t be abusive! implicit/explicit messages in offensive and abusive language". The 12th Language Resources and Evaluation Conference (pp. 6193-6202). European Language Resources Association.',
|
||||
},
|
||||
},
|
||||
{
|
||||
_id: '80001dd32a752421fdcc64e91fbd237dc31d6bb3',
|
||||
url_path: 'dataset-2',
|
||||
file_path: 'content/dataset-2/index.md',
|
||||
metadata: {
|
||||
title:
|
||||
'Abusive Language Detection on Arabic Social Media (Al Jazeera)',
|
||||
'link-to-publication': 'https://www.aclweb.org/anthology/W17-3008',
|
||||
'link-to-data':
|
||||
'http://alt.qcri.org/~hmubarak/offensive/AJCommentsClassification-CF.xlsx',
|
||||
'task-description':
|
||||
'Ternary (Obscene, Offensive but not obscene, Clean)',
|
||||
'details-of-task': 'Incivility',
|
||||
'size-of-dataset': 32000,
|
||||
'percentage-abusive': 0.81,
|
||||
language: 'Arabic',
|
||||
'level-of-annotation': ['Posts'],
|
||||
platform: ['AlJazeera'],
|
||||
medium: ['Text'],
|
||||
reference:
|
||||
'Mubarak, H., Darwish, K. and Magdy, W., 2017. Abusive Language Detection on Arabic Social Media. In: Proceedings of the First Workshop on Abusive Language Online. Vancouver, Canada: Association for Computational Linguistics, pp.52-56.',
|
||||
},
|
||||
},
|
||||
{
|
||||
_id: '96649d05d8193f4333b10015af76c6562971bd8c',
|
||||
url_path: 'dataset-3',
|
||||
file_path: 'content/dataset-3/index.md',
|
||||
metadata: {
|
||||
title: 'CoRAL: a Context-aware Croatian Abusive Language Dataset',
|
||||
'link-to-publication':
|
||||
'https://aclanthology.org/2022.findings-aacl.21/',
|
||||
'link-to-data':
|
||||
'https://github.com/shekharRavi/CoRAL-dataset-Findings-of-the-ACL-AACL-IJCNLP-2022',
|
||||
'task-description':
|
||||
'Multi-class based on context dependency categories (CDC)',
|
||||
'details-of-task': 'Detectioning CDC from abusive comments',
|
||||
'size-of-dataset': 2240,
|
||||
'percentage-abusive': 100,
|
||||
language: 'Croatian',
|
||||
'level-of-annotation': ['Posts'],
|
||||
platform: ['Posts'],
|
||||
medium: ['Newspaper Comments'],
|
||||
reference:
|
||||
'Ravi Shekhar, Mladen Karan and Matthew Purver (2022). CoRAL: a Context-aware Croatian Abusive Language Dataset. Findings of the ACL: AACL-IJCNLP.',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
;
|
||||
|
||||
export const WithFacets: Story = {
|
||||
name: 'Catalog with facets',
|
||||
args: {
|
||||
datasets: [
|
||||
{
|
||||
_id: '07026b22d49916754df1dc8ffb9ccd1c31878aae',
|
||||
url_path: 'dataset-4',
|
||||
file_path: 'content/dataset-4/index.md',
|
||||
metadata: {
|
||||
title: 'Detecting Abusive Albanian',
|
||||
'link-to-publication': 'https://arxiv.org/abs/2107.13592',
|
||||
'link-to-data': 'https://doi.org/10.6084/m9.figshare.19333298.v1',
|
||||
'task-description':
|
||||
'Hierarchical (offensive/not; untargeted/targeted; person/group/other)',
|
||||
'details-of-task':
|
||||
'Detect and categorise abusive language in social media data',
|
||||
'size-of-dataset': 11874,
|
||||
'percentage-abusive': 13.2,
|
||||
language: 'Albanian',
|
||||
'level-of-annotation': ['Posts'],
|
||||
platform: ['Instagram', 'Youtube'],
|
||||
medium: ['Text'],
|
||||
reference:
|
||||
'Nurce, E., Keci, J., Derczynski, L., 2021. Detecting Abusive Albanian. arXiv:2107.13592',
|
||||
},
|
||||
},
|
||||
{
|
||||
_id: '42c86cf3c4fbbab11d91c2a7d6dcb8f750bc4e19',
|
||||
url_path: 'dataset-1',
|
||||
file_path: 'content/dataset-1/index.md',
|
||||
metadata: {
|
||||
title: 'AbuseEval v1.0',
|
||||
'link-to-publication':
|
||||
'http://www.lrec-conf.org/proceedings/lrec2020/pdf/2020.lrec-1.760.pdf',
|
||||
'link-to-data': 'https://github.com/tommasoc80/AbuseEval',
|
||||
'task-description':
|
||||
'Explicitness annotation of offensive and abusive content',
|
||||
'details-of-task':
|
||||
'Enriched versions of the OffensEval/OLID dataset with the distinction of explicit/implicit offensive messages and the new dimension for abusive messages. Labels for offensive language: EXPLICIT, IMPLICT, NOT; Labels for abusive language: EXPLICIT, IMPLICT, NOTABU',
|
||||
'size-of-dataset': 14100,
|
||||
'percentage-abusive': 20.75,
|
||||
language: 'English',
|
||||
'level-of-annotation': ['Tweets'],
|
||||
platform: ['Twitter'],
|
||||
medium: ['Text'],
|
||||
reference:
|
||||
'Caselli, T., Basile, V., Jelena, M., Inga, K., and Michael, G. 2020. "I feel offended, don’t be abusive! implicit/explicit messages in offensive and abusive language". The 12th Language Resources and Evaluation Conference (pp. 6193-6202). European Language Resources Association.',
|
||||
},
|
||||
},
|
||||
{
|
||||
_id: '80001dd32a752421fdcc64e91fbd237dc31d6bb3',
|
||||
url_path: 'dataset-2',
|
||||
file_path: 'content/dataset-2/index.md',
|
||||
metadata: {
|
||||
title:
|
||||
'Abusive Language Detection on Arabic Social Media (Al Jazeera)',
|
||||
'link-to-publication': 'https://www.aclweb.org/anthology/W17-3008',
|
||||
'link-to-data':
|
||||
'http://alt.qcri.org/~hmubarak/offensive/AJCommentsClassification-CF.xlsx',
|
||||
'task-description':
|
||||
'Ternary (Obscene, Offensive but not obscene, Clean)',
|
||||
'details-of-task': 'Incivility',
|
||||
'size-of-dataset': 32000,
|
||||
'percentage-abusive': 0.81,
|
||||
language: 'Arabic',
|
||||
'level-of-annotation': ['Posts'],
|
||||
platform: ['AlJazeera'],
|
||||
medium: ['Text'],
|
||||
reference:
|
||||
'Mubarak, H., Darwish, K. and Magdy, W., 2017. Abusive Language Detection on Arabic Social Media. In: Proceedings of the First Workshop on Abusive Language Online. Vancouver, Canada: Association for Computational Linguistics, pp.52-56.',
|
||||
},
|
||||
},
|
||||
{
|
||||
_id: '96649d05d8193f4333b10015af76c6562971bd8c',
|
||||
url_path: 'dataset-3',
|
||||
file_path: 'content/dataset-3/index.md',
|
||||
metadata: {
|
||||
title: 'CoRAL: a Context-aware Croatian Abusive Language Dataset',
|
||||
'link-to-publication':
|
||||
'https://aclanthology.org/2022.findings-aacl.21/',
|
||||
'link-to-data':
|
||||
'https://github.com/shekharRavi/CoRAL-dataset-Findings-of-the-ACL-AACL-IJCNLP-2022',
|
||||
'task-description':
|
||||
'Multi-class based on context dependency categories (CDC)',
|
||||
'details-of-task': 'Detectioning CDC from abusive comments',
|
||||
'size-of-dataset': 2240,
|
||||
'percentage-abusive': 100,
|
||||
language: 'Croatian',
|
||||
'level-of-annotation': ['Posts'],
|
||||
platform: ['Posts'],
|
||||
medium: ['Newspaper Comments'],
|
||||
reference:
|
||||
'Ravi Shekhar, Mladen Karan and Matthew Purver (2022). CoRAL: a Context-aware Croatian Abusive Language Dataset. Findings of the ACL: AACL-IJCNLP.',
|
||||
},
|
||||
},
|
||||
],
|
||||
facets: ['language', 'platform']
|
||||
},
|
||||
};
|
||||
;
|
||||
Reference in New Issue
Block a user