[alan-turing][m] - individual pages
This commit is contained in:
@@ -0,0 +1,16 @@
|
||||
---
|
||||
title: "Abusive Language Detection on Arabic Social Media (Al Jazeera)"
|
||||
link-to-publication: https://www.aclweb.org/anthology/W17-3008
|
||||
link-to-data: http://alt.qcri.org/~hmubarak/offensive/AJCommentsClassification-CF.xlsx
|
||||
task-description: Ternary (Obscene, Offensive but not obscene, Clean)
|
||||
details-of-task: Incivility
|
||||
size-of-dataset: 32000
|
||||
percentage-abusive: 0.81
|
||||
language: Arabic
|
||||
level-of-annotation: ["Posts"]
|
||||
platform: ["AlJazeera"]
|
||||
medium: ["Text"]
|
||||
reference: "Mubarak, H., Darwish, K. and Magdy, W., 2017. Abusive Language Detection on Arabic Social Media. In: Proceedings of the First Workshop on Abusive Language Online. Vancouver, Canada: Association for Computational Linguistics, pp.52-56."
|
||||
---
|
||||
|
||||
SOMETHING TEST
|
||||
@@ -0,0 +1,14 @@
|
||||
---
|
||||
title: Detecting Abusive Albanian
|
||||
link-to-publication: https://arxiv.org/abs/2107.13592
|
||||
link-to-data: https://doi.org/10.6084/m9.figshare.19333298.v1
|
||||
task-description: Hierarchical (offensive/not; untargeted/targeted; person/group/other)
|
||||
details-of-task: Detect and categorise abusive language in social media data
|
||||
size-of-dataset: 11874
|
||||
percentage-abusive: 13.2
|
||||
language: Albanian
|
||||
level-of-annotation: ["Posts"]
|
||||
platform: ["Instagram", "Youtube"]
|
||||
medium: ["Text"]
|
||||
reference: "Nurce, E., Keci, J., Derczynski, L., 2021. Detecting Abusive Albanian. arXiv:2107.13592"
|
||||
---
|
||||
@@ -0,0 +1,15 @@
|
||||
---
|
||||
title: "Hate Speech Detection in the Bengali language: A Dataset and its Baseline Evaluation"
|
||||
link-to-publication: https://arxiv.org/pdf/2012.09686.pdf
|
||||
link-to-data: https://www.kaggle.com/naurosromim/bengali-hate-speech-dataset
|
||||
task-description: Binary (hateful, not)
|
||||
details-of-task: "Several categories: sports, entertainment, crime, religion, politics, celebrity and meme"
|
||||
size-of-dataset: 30000
|
||||
percentage-abusive: 0.33
|
||||
language: Bengali
|
||||
level-of-annotation: ["Posts"]
|
||||
platform: ["Youtube", "Facebook"]
|
||||
medium: ["Text"]
|
||||
reference: "Romim, N., Ahmed, M., Talukder, H., & Islam, M. S. (2021). Hate speech detection in the bengali language: A dataset and its baseline evaluation. In Proceedings of International Joint Conference on Advances in Computational Intelligence (pp. 457-468). Springer, Singapore."
|
||||
---
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
---
|
||||
title: "Let-Mi: An Arabic Levantine Twitter Dataset for Misogynistic Language"
|
||||
link-to-publication: https://arxiv.org/abs/2103.10195
|
||||
link-to-data: https://drive.google.com/file/d/1mM2vnjsy7QfUmdVUpKqHRJjZyQobhTrW/view
|
||||
task-description: Binary (misogyny/none) and Multi-class (none, discredit, derailing, dominance, stereotyping & objectification, threat of violence, sexual harassment, damning)
|
||||
details-of-task: Introducing an Arabic Levantine Twitter dataset for Misogynistic language
|
||||
size-of-dataset: 6603
|
||||
percentage-abusive: 48.76
|
||||
language: Arabic
|
||||
level-of-annotation: ["Posts"]
|
||||
platform: ["Twitter"]
|
||||
medium: ["Text", "Images"]
|
||||
reference: "Hala Mulki and Bilal Ghanem. 2021. Let-Mi: An Arabic Levantine Twitter Dataset for Misogynistic Language. In Proceedings of the Sixth Arabic Natural Language Processing Workshop, pages 154–163, Kyiv, Ukraine (Virtual). Association for Computational Linguistics"
|
||||
---
|
||||
Reference in New Issue
Block a user