Resources & Publications

All members of the network can share their recent work on media bias here.
Resources
Most recent models are published on Huggingface
[Benchmark, GitHub] MBIB – the first Media Bias Identification Benchmark Task and Dataset Collection
[Dataset, GitHub] BABE – Bias Annotations By Experts
[Scale/Questionnaire to measure bias perception] Do You Think It’s Biased? How To Ask For The Perception Of Media Bias (A set of tested questions to assess media bias perception to be used in any bias-related research)
[Dataset, Zenodo] MBIC -A Media Bias Annotation Dataset Including Annotator Characteristics
Publications
2022
Zhukova, Anastasia; Hamborg, Felix; Donnay, Karsten; Gipp, Bela
XCoref: Cross-Document Coreference Resolution in the Wild Proceedings Article
In: Information for a Better World: Shaping the Global Future: 17th International Conference, IConference 2022, Virtual Event, February 28 – March 4, 2022, Proceedings, Part I, pp. 272–291, Springer-Verlag, Berlin, Heidelberg, 2022, ISBN: 978-3-030-96956-1.
Abstract | Links | BibTeX | Tags: Cross-document coreference resolution, media bias, news analysis
@inproceedings{zhukova2022,
title = {XCoref: Cross-Document Coreference Resolution in the Wild},
author = {Anastasia Zhukova and Felix Hamborg and Karsten Donnay and Bela Gipp},
url = {https://doi.org/10.1007/978-3-030-96957-8_25},
doi = {10.1007/978-3-030-96957-8_25},
isbn = {978-3-030-96956-1},
year = {2022},
date = {2022-01-01},
booktitle = {Information for a Better World: Shaping the Global Future: 17th International Conference, IConference 2022, Virtual Event, February 28 – March 4, 2022, Proceedings, Part I},
pages = {272–291},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
abstract = {Datasets and methods for cross-document coreference resolution (CDCR) focus on events or entities with strict coreference relations. They lack, however, annotating and resolving coreference mentions with more abstract or loose relations that may occur when news articles report about controversial and polarized events. Bridging and loose coreference relations trigger associations that may expose news readers to bias by word choice and labeling. For example, coreferential mentions of “direct talks between U.S. President Donald Trump and Kim” such as “an extraordinary meeting following months of heated rhetoric” or “great chance to solve a world problem” form a more positive perception of this event. A step towards bringing awareness of bias by word choice and labeling is the reliable resolution of coreferences with high lexical diversity. We propose an unsupervised method named XCoref, which is a CDCR method that capably resolves not only previously prevalent entities, such as persons, e.g., “Donald Trump,” but also abstractly defined concepts, such as groups of persons, “caravan of immigrants,” events and actions, e.g., “marching to the U.S. border.” In an extensive evaluation, we compare the proposed XCoref to a state-of-the-art CDCR method and a previous method TCA that resolves such complex coreference relations and find that XCoref outperforms these methods. Outperforming an established CDCR model shows that the new CDCR models need to be evaluated on semantically complex mentions with more loose coreference relations to indicate their applicability of models to resolve mentions in the “wild” of political news articles.},
keywords = {Cross-document coreference resolution, media bias, news analysis},
pubstate = {published},
tppubtype = {inproceedings}
}
2021
Spinde, Timo
An Interdisciplinary Approach for the Automated Detection and Visualization of Media Bias in News Articles Proceedings Article
In: 2021 IEEE International Conference on Data Mining Workshops (ICDMW), 2021.
Links | BibTeX | Tags: media bias, news analysis, slanted coverage, text retrieval
@inproceedings{spinde2021g,
title = {An Interdisciplinary Approach for the Automated Detection and Visualization of Media Bias in News Articles},
author = {Timo Spinde},
url = {https://media-bias-research.org/wp-content/uploads/2021/09/Spinde2021g.pdf},
doi = {10.1109/ICDMW53433.2021.00144},
year = {2021},
date = {2021-09-30},
urldate = {2021-09-30},
booktitle = {2021 IEEE International Conference on Data Mining Workshops (ICDMW)},
keywords = {media bias, news analysis, slanted coverage, text retrieval},
pubstate = {published},
tppubtype = {inproceedings}
}
Spinde, Timo; Rudnitckaia, Lada; Mitrović, Jelena; Hamborg, Felix; Granitzer, Michael; Gipp, Bela; Donnay, Karsten
Automated identification of bias inducing words in news articles using linguistic and context-oriented features Journal Article
In: Information Processing & Management, vol. 58, no. 3, pp. 102505, 2021, ISSN: 0306-4573.
Abstract | Links | BibTeX | Tags: bias data set, context analysis, feature engineering, media bias, news analysis, text analysis
@article{SPINDE2021102505,
title = {Automated identification of bias inducing words in news articles using linguistic and context-oriented features},
author = {Timo Spinde and Lada Rudnitckaia and Jelena Mitrović and Felix Hamborg and Michael Granitzer and Bela Gipp and Karsten Donnay},
url = {https://www.sciencedirect.com/science/article/pii/S0306457321000157/pdfft?md5=64e81212b3bfa861d01a6fe3d5b979c3&pid=1-s2.0-S0306457321000157-main.pdf},
doi = {https://doi.org/10.1016/j.ipm.2021.102505},
issn = {0306-4573},
year = {2021},
date = {2021-01-01},
journal = {Information Processing & Management},
volume = {58},
number = {3},
pages = {102505},
abstract = {Media has a substantial impact on public perception of events, and, accordingly, the way media presents events can potentially alter the beliefs and views of the public. One of the ways in which bias in news articles can be introduced is by altering word choice. Such a form of bias is very challenging to identify automatically due to the high context-dependence and the lack of a large-scale gold-standard data set. In this paper, we present a prototypical yet robust and diverse data set for media bias research. It consists of 1,700 statements representing various media bias instances and contains labels for media bias identification on the word and sentence level. In contrast to existing research, our data incorporate background information on the participants’ demographics, political ideology, and their opinion about media in general. Based on our data, we also present a way to detect bias-inducing words in news articles automatically. Our approach is feature-oriented, which provides a strong descriptive and explanatory power compared to deep learning techniques. We identify and engineer various linguistic, lexical, and syntactic features that can potentially be media bias indicators. Our resource collection is the most complete within the media bias research area to the best of our knowledge. We evaluate all of our features in various combinations and retrieve their possible importance both for future research and for the task in general. We also evaluate various possible Machine Learning approaches with all of our features. XGBoost, a decision tree implementation, yields the best results. Our approach achieves an F1-score of 0.43, a precision of 0.29, a recall of 0.77, and a ROC AUC of 0.79, which outperforms current media bias detection methods based on features. We propose future improvements, discuss the perspectives of the feature-based approach and a combination of neural networks and deep learning with our current system.},
keywords = {bias data set, context analysis, feature engineering, media bias, news analysis, text analysis},
pubstate = {published},
tppubtype = {article}
}
Zhukova, Anastasia; Hamborg, Felix; Donnay, Karsten; Gipp, Bela
Concept Identification of Directly and Indirectly Related Mentions Referring to Groups of Persons Proceedings Article
In: Diversity, Divergence, Dialogue: 16th International Conference, IConference 2021, Beijing, China, March 17–31, 2021, Proceedings, Part I, pp. 514–526, Springer-Verlag, Beijing, China, 2021, ISBN: 978-3-030-71291-4.
Abstract | Links | BibTeX | Tags: Coreference resolution, media bias, news analysis
@inproceedings{zhukova2021,
title = {Concept Identification of Directly and Indirectly Related Mentions Referring to Groups of Persons},
author = {Anastasia Zhukova and Felix Hamborg and Karsten Donnay and Bela Gipp},
url = {https://doi.org/10.1007/978-3-030-71292-1_40},
doi = {10.1007/978-3-030-71292-1_40},
isbn = {978-3-030-71291-4},
year = {2021},
date = {2021-01-01},
booktitle = {Diversity, Divergence, Dialogue: 16th International Conference, IConference 2021, Beijing, China, March 17–31, 2021, Proceedings, Part I},
pages = {514–526},
publisher = {Springer-Verlag},
address = {Beijing, China},
abstract = {Unsupervised concept identification through clustering, i.e., identification of semantically related words and phrases, is a common approach to identify contextual primitives employed in various use cases, e.g., text dimension reduction, i.e., replace words with the concepts to reduce the vocabulary size, summarization, and named entity resolution. We demonstrate the first results of an unsupervised approach for the identification of groups of persons as actors extracted from a set of related articles. Specifically, the approach clusters mentions of groups of persons that act as non-named entity actors in the texts, e.g., “migrant families” = “asylum-seekers.” Compared to our baseline, the approach keeps the mentions of the geopolitical entities separated, e.g., “Iran leaders” ≠ “European leaders,” and clusters (in)directly related mentions with diverse wording, e.g., “American officials” = “Trump Administration.”},
keywords = {Coreference resolution, media bias, news analysis},
pubstate = {published},
tppubtype = {inproceedings}
}