{
  "@context": "https://schema.org",
  "@graph": [
    {
      "@id": "https://iamshnoo.github.io/#person",
      "@type": "Person",
      "name": "Anjishnu Mukherjee",
      "url": "https://iamshnoo.github.io/",
      "image": "https://iamshnoo.github.io/images/optimized/profile/profile-560.webp",
      "jobTitle": "Computer Science Ph.D. Candidate",
      "affiliation": {
        "@type": "CollegeOrUniversity",
        "name": "George Mason University"
      },
      "sameAs": [
        "https://github.com/iamshnoo/",
        "https://www.linkedin.com/in/anjishnumukherjee/",
        "https://scholar.google.com/citations?user=3849YpIAAAAJ&hl=en",
        "https://iamshnoo.github.io/cv/"
      ],
      "knowsAbout": [
        "Cultural adaptation",
        "Social bias",
        "Multilingual NLP",
        "Multimodal evaluation"
      ]
    },
    {
      "@id": "https://iamshnoo.github.io/#website",
      "@type": "WebSite",
      "name": "Anjishnu Mukherjee",
      "url": "https://iamshnoo.github.io/",
      "author": {
        "@id": "https://iamshnoo.github.io/#person"
      },
      "dateModified": "2026-05-02"
    },
    {
      "@id": "https://iamshnoo.github.io/",
      "@type": "ProfilePage",
      "name": "Anjishnu Mukherjee",
      "url": "https://iamshnoo.github.io/",
      "about": {
        "@id": "https://iamshnoo.github.io/#person"
      },
      "mainEntity": {
        "@id": "https://iamshnoo.github.io/#person"
      },
      "dateModified": "2026-05-02"
    },
    {
      "@type": "ItemList",
      "name": "Anjishnu Mukherjee updates",
      "itemListElement": [
        {
          "@type": "ListItem",
          "position": 1,
          "item": {
            "@type": "CreativeWork",
            "name": "Will be joining my third Applied Science internship at Amazon.",
            "datePublished": "2026-05",
            "url": "https://iamshnoo.github.io/"
          }
        },
        {
          "@type": "ListItem",
          "position": 2,
          "item": {
            "@type": "CreativeWork",
            "name": "Jinhao's paper on \"Knowing Bias, Doing Better\" was accepted to ICML 2026 as a poster.",
            "datePublished": "2026-05",
            "url": "https://iamshnoo.github.io/#knowbias",
            "about": {
              "@id": "https://iamshnoo.github.io/#knowbias"
            }
          }
        },
        {
          "@type": "ListItem",
          "position": 3,
          "item": {
            "@type": "CreativeWork",
            "name": "Rinki's paper on measuring South Asian biases was accepted to TrustNLP at ACL 2026.",
            "datePublished": "2026-05",
            "url": "https://iamshnoo.github.io/#south-asian-biases",
            "about": {
              "@id": "https://iamshnoo.github.io/#south-asian-biases"
            }
          }
        },
        {
          "@type": "ListItem",
          "position": 4,
          "item": {
            "@type": "CreativeWork",
            "name": "Passed my Ph.D. proposal defense and advanced to candidacy.",
            "datePublished": "2026-04",
            "url": "https://iamshnoo.github.io/"
          }
        },
        {
          "@type": "ListItem",
          "position": 5,
          "item": {
            "@type": "CreativeWork",
            "name": "Completed my M.S. in Computer Science with an Outstanding Academic Achievement award.",
            "datePublished": "2025-12",
            "url": "https://iamshnoo.github.io/"
          }
        },
        {
          "@type": "ListItem",
          "position": 6,
          "item": {
            "@type": "CreativeWork",
            "name": "My first mentee, Mamnuya Rinki, defended her M.S. thesis.",
            "datePublished": "2025-11",
            "url": "https://iamshnoo.github.io/"
          }
        },
        {
          "@type": "ListItem",
          "position": 7,
          "item": {
            "@type": "CreativeWork",
            "name": "Presented \"Crossroads of Continents\" at WACV 2025 as an oral and poster.",
            "datePublished": "2025-03",
            "url": "https://iamshnoo.github.io/#crossroads",
            "about": {
              "@id": "https://iamshnoo.github.io/#crossroads"
            }
          }
        },
        {
          "@type": "ListItem",
          "position": 8,
          "item": {
            "@type": "CreativeWork",
            "name": "Presented \"BiasDora\" at EMNLP Findings 2024.",
            "datePublished": "2024-11",
            "url": "https://iamshnoo.github.io/#biasdora",
            "about": {
              "@id": "https://iamshnoo.github.io/#biasdora"
            }
          }
        },
        {
          "@type": "ListItem",
          "position": 9,
          "item": {
            "@type": "CreativeWork",
            "name": "Recognized as an Outstanding Reviewer for EMNLP 2024.",
            "datePublished": "2024-11",
            "url": "https://iamshnoo.github.io/"
          }
        },
        {
          "@type": "ListItem",
          "position": 10,
          "item": {
            "@type": "CreativeWork",
            "name": "Gave an invited diffusion models tutorial for Advanced NLP at George Mason.",
            "datePublished": "2024-11",
            "url": "https://iamshnoo.github.io/"
          }
        },
        {
          "@type": "ListItem",
          "position": 11,
          "item": {
            "@type": "CreativeWork",
            "name": "Gave an invited talk on multilingual socio-cultural biases at the University of Toronto.",
            "datePublished": "2024-09",
            "url": "https://iamshnoo.github.io/"
          }
        },
        {
          "@type": "ListItem",
          "position": 12,
          "item": {
            "@type": "CreativeWork",
            "name": "Presented \"Global Gallery\" at NAACL 2024.",
            "datePublished": "2024-06",
            "url": "https://iamshnoo.github.io/#global-gallery",
            "about": {
              "@id": "https://iamshnoo.github.io/#global-gallery"
            }
          }
        },
        {
          "@type": "ListItem",
          "position": 13,
          "item": {
            "@type": "CreativeWork",
            "name": "Presented \"Global Gallery\" at MASC-SLL at Johns Hopkins.",
            "datePublished": "2024-05",
            "url": "https://iamshnoo.github.io/#global-gallery",
            "about": {
              "@id": "https://iamshnoo.github.io/#global-gallery"
            }
          }
        },
        {
          "@type": "ListItem",
          "position": 14,
          "item": {
            "@type": "CreativeWork",
            "name": "Gave an invited talk at the University of Notre Dame NLP seminar.",
            "datePublished": "2024-02",
            "url": "https://iamshnoo.github.io/"
          }
        },
        {
          "@type": "ListItem",
          "position": 15,
          "item": {
            "@type": "CreativeWork",
            "name": "Presented \"Global Voices, Local Biases\" at EMNLP 2023.",
            "datePublished": "2023-12",
            "url": "https://iamshnoo.github.io/#global-voices",
            "about": {
              "@id": "https://iamshnoo.github.io/#global-voices"
            }
          }
        }
      ]
    },
    {
      "@id": "https://iamshnoo.github.io/#knowbias",
      "@type": "ScholarlyArticle",
      "name": "Knowing Bias, Doing Better: Mitigating Social Bias in LLMs via Know-Bias Neuron Enhancement",
      "url": "https://iamshnoo.github.io/#knowbias",
      "mainEntityOfPage": "https://iamshnoo.github.io/#knowbias",
      "description": "Large language models (LLMs) exhibit social biases that reinforce harmful stereotypes, limiting their safe deployment. Most existing debiasing methods adopt a suppressive paradigm by modifying parameters, prompts, or neurons associated with biased behavior; however, such approaches are often brittle, weakly generalizable, data-inefficient, and prone to degrading general capability. We propose KnowBias, a lightweight and conceptually distinct framework that mitigates bias by strengthening, rather than suppressing, neurons encoding bias-knowledge. KnowBias identifies neurons encoding bias knowledge using a small set of bias-knowledge questions via attribution-based analysis, and selectively enhances them at inference time. This design enables strong debiasing while preserving general capabilities, generalizes across bias types and demographics, and is highly data efficient, requiring only a handful of simple yes/no questions and no retraining. Experiments across multiple benchmarks and LLMs demonstrate consistent state-of-the-art debiasing performance with minimal utility degradation.",
      "citation": "@inproceedings{pan2026knowing,\n  title = {Knowing Bias, Doing Better: Mitigating Social Bias in LLMs via Know-Bias Neuron Enhancement},\n  author = {Pan, Jinhao and Raj, Chahat and Mukherjee, Anjishnu and Mansouri, Sina and Wei, Bowen and Yada, Shloka and Zhu, Ziwei},\n  booktitle = {Proceedings of the 43rd International Conference on Machine Learning},\n  year = {2026},\n  url = {https://arxiv.org/abs/2601.21864}\n}",
      "identifier": [
        {
          "@type": "PropertyValue",
          "propertyID": "arXiv",
          "value": "2601.21864"
        }
      ],
      "author": [
        {
          "@type": "Person",
          "name": "Jinhao Pan"
        },
        {
          "@type": "Person",
          "name": "Chahat Raj"
        },
        {
          "@type": "Person",
          "name": "Anjishnu Mukherjee"
        },
        {
          "@type": "Person",
          "name": "Sina Mansouri"
        },
        {
          "@type": "Person",
          "name": "Bowen Wei"
        },
        {
          "@type": "Person",
          "name": "Shloka Yada"
        },
        {
          "@type": "Person",
          "name": "Ziwei Zhu"
        }
      ],
      "datePublished": "2026",
      "image": "https://iamshnoo.github.io/images/optimized/full/knowbias-full.ed19b118a6.webp",
      "isPartOf": "ICML '26 (Poster)",
      "sameAs": [
        "https://arxiv.org/pdf/2601.21864",
        "https://github.com/JP-25/KnowBias"
      ]
    },
    {
      "@id": "https://iamshnoo.github.io/#metadata-localization",
      "@type": "ScholarlyArticle",
      "name": "Metadata Conditioned Large Language Models for Localization",
      "url": "https://iamshnoo.github.io/metadata_localization/",
      "mainEntityOfPage": "https://iamshnoo.github.io/#metadata-localization",
      "subjectOf": {
        "@type": "WebPage",
        "@id": "https://iamshnoo.github.io/metadata_localization/",
        "url": "https://iamshnoo.github.io/metadata_localization/",
        "name": "Metadata Conditioned Large Language Models for Localization project page"
      },
      "description": "Large language models are typically trained by treating text as a single global distribution, often resulting in geographically homogenized behavior. We study metadata conditioning as a lightweight approach for localization, pre-training 31 models (at 0.5B and 1B parameter scales) from scratch on large-scale English news data annotated with verified URLs, country tags, and continent tags, covering 4 continents and 17 countries. Across four controlled experiments, we show that metadata conditioning consistently improves in-region performance without sacrificing cross-region generalization, enables global models to recover localization comparable to region-specific models, and improves learning efficiency. Our ablation studies demonstrate that URL-level metadata alone captures much of the geographic signal, while balanced regional data coverage remains essential, as metadata cannot fully compensate for missing regions. Finally, we introduce a downstream benchmark of 800 localized news MCQs and show that after instruction tuning, metadata conditioned global models achieve accuracy comparable to LLaMA-3.2-1B-Instruct, despite being trained on substantially less data. Together, these results establish metadata conditioning as a practical and compute-efficient approach for localization of language models.",
      "citation": "@misc{mukherjee2026metadata,\n  title = {Metadata Conditioned Large Language Models for Localization},\n  author = {Mukherjee, Anjishnu and Zhu, Ziwei and Anastasopoulos, Antonios},\n  year = {2026},\n  eprint = {2601.15236},\n  archivePrefix = {arXiv},\n  primaryClass = {cs.CL},\n  url = {https://arxiv.org/abs/2601.15236}\n}",
      "identifier": [
        {
          "@type": "PropertyValue",
          "propertyID": "arXiv",
          "value": "2601.15236"
        }
      ],
      "author": [
        {
          "@type": "Person",
          "name": "Anjishnu Mukherjee"
        },
        {
          "@type": "Person",
          "name": "Ziwei Zhu"
        },
        {
          "@type": "Person",
          "name": "Antonios Anastasopoulos"
        }
      ],
      "datePublished": "2026",
      "image": "https://iamshnoo.github.io/images/optimized/full/metadata-localization-full.7d52c6b370.webp",
      "isPartOf": "ArXiv",
      "sameAs": [
        "https://arxiv.org/pdf/2601.15236",
        "https://github.com/iamshnoo/metadata_localization",
        "https://iamshnoo.github.io/metadata_localization/"
      ]
    },
    {
      "@id": "https://iamshnoo.github.io/#south-asian-biases",
      "@type": "ScholarlyArticle",
      "name": "Measuring South Asian Biases in Large Language Models",
      "url": "https://iamshnoo.github.io/#south-asian-biases",
      "mainEntityOfPage": "https://iamshnoo.github.io/#south-asian-biases",
      "description": "Evaluations of Large Language Models (LLMs) often overlook intersectional and culturally specific biases, particularly in underrepresented multilingual regions like South Asia. This work addresses these gaps by conducting a multilingual and intersectional analysis of LLM outputs across 10 Indo-Aryan and Dravidian languages, identifying how cultural stigmas influenced by purdah and patriarchy are reinforced in generative tasks. We construct a culturally grounded bias lexicon capturing previously unexplored intersectional dimensions including gender, religion, marital status, and number of children. We use our lexicon to quantify intersectional bias and the effectiveness of self-debiasing in open-ended generations (e.g., storytelling, hobbies, and to-do lists), where bias manifests subtly and remains largely unexamined in multilingual contexts. Finally, we evaluate two self-debiasing strategies (simple and complex prompts) to measure their effectiveness in reducing culturally specific bias in Indo-Aryan and Dravidian languages. Our approach offers a nuanced lens into cultural bias by introducing a novel bias lexicon and evaluation framework that extends beyond Eurocentric or small-scale multilingual settings.",
      "citation": "@inproceedings{rinki2026measuring,\n  title = {Measuring South Asian Biases in Large Language Models},\n  author = {Rinki, Mamnuya and Raj, Chahat and Mukherjee, Anjishnu and Zhu, Ziwei},\n  booktitle = {Proceedings of the TrustNLP Workshop at ACL},\n  year = {2026},\n  url = {https://arxiv.org/abs/2505.18466}\n}",
      "identifier": [
        {
          "@type": "PropertyValue",
          "propertyID": "arXiv",
          "value": "2505.18466"
        }
      ],
      "author": [
        {
          "@type": "Person",
          "name": "Mamnuya Rinki"
        },
        {
          "@type": "Person",
          "name": "Chahat Raj"
        },
        {
          "@type": "Person",
          "name": "Anjishnu Mukherjee"
        },
        {
          "@type": "Person",
          "name": "Ziwei Zhu"
        }
      ],
      "datePublished": "2025",
      "image": "https://iamshnoo.github.io/images/optimized/full/south-asian-biases-full.ac508bf79a.webp",
      "isPartOf": "TrustNLP @ ACL 2026 (Poster)",
      "sameAs": [
        "https://arxiv.org/pdf/2505.18466",
        "https://github.com/mamnuya/purdah_and_patriarchy"
      ]
    },
    {
      "@id": "https://iamshnoo.github.io/#crossroads",
      "@type": "ScholarlyArticle",
      "name": "Crossroads of Continents: Automated Artifact Extraction for Cultural Adaptation with Large Multimodal Models",
      "url": "https://iamshnoo.github.io/crossroads_wacv_25/",
      "mainEntityOfPage": "https://iamshnoo.github.io/#crossroads",
      "subjectOf": {
        "@type": "WebPage",
        "@id": "https://iamshnoo.github.io/crossroads_wacv_25/",
        "url": "https://iamshnoo.github.io/crossroads_wacv_25/",
        "name": "Crossroads of Continents: Automated Artifact Extraction for Cultural Adaptation with Large Multimodal Models project page"
      },
      "description": "We present a comprehensive three-phase study to examine (1) the cultural understanding of Large Multimodal Models (LMMs) by introducing DalleStreet, a large-scale dataset generated by DALL-E 3 and validated by humans, containing 9,935 images of 67 countries and 10 concept classes; (2) the underlying implicit and potentially stereotypical cultural associations with a cultural artifact extraction task; and (3) an approach to adapt cultural representation in an image based on extracted associations using a modular pipeline, CultureAdapt. We find disparities in cultural understanding at geographic sub-region levels with both open-source (LLaVA) and closed-source (GPT-4V) models on DalleStreet and other existing benchmarks, which we try to understand using over 18,000 artifacts that we identify in association to different countries. Our findings reveal a nuanced picture of the cultural competence of LMMs, highlighting the need to develop culture-aware systems.",
      "citation": "@InProceedings{Mukherjee_2025_WACV,\n  author = {Mukherjee, Anjishnu and Zhu, Ziwei and Anastasopoulos, Antonios},\n  title = {Crossroads of Continents: Automated Artifact Extraction for Cultural Adaptation with Large Multimodal Models},\n  booktitle = {Proceedings of the Winter Conference on Applications of Computer Vision (WACV)},\n  month = {February},\n  year = {2025},\n  pages = {1755-1764}\n}",
      "identifier": [
        {
          "@type": "PropertyValue",
          "propertyID": "arXiv",
          "value": "2407.02067"
        }
      ],
      "author": [
        {
          "@type": "Person",
          "name": "Anjishnu Mukherjee"
        },
        {
          "@type": "Person",
          "name": "Ziwei Zhu"
        },
        {
          "@type": "Person",
          "name": "Antonios Anastasopoulos"
        }
      ],
      "datePublished": "2025",
      "image": "https://iamshnoo.github.io/images/optimized/full/crossroads-full.d4bae18736.webp",
      "isPartOf": "WACV '25 (Oral and Poster)",
      "sameAs": [
        "https://www.arxiv.org/pdf/2407.02067",
        "https://github.com/iamshnoo/crossroads",
        "https://iamshnoo.github.io/crossroads_wacv_25/"
      ]
    },
    {
      "@id": "https://iamshnoo.github.io/#biasdora",
      "@type": "ScholarlyArticle",
      "name": "BiasDora: Exploring Hidden Biased Associations in Vision-Language Models",
      "url": "https://iamshnoo.github.io/#biasdora",
      "mainEntityOfPage": "https://iamshnoo.github.io/#biasdora",
      "description": "Existing works examining Vision-Language Models (VLMs) for social biases predominantly focus on a limited set of documented bias associations, such as gender:profession or race:crime. This narrow scope often overlooks a vast range of unexamined implicit associations, restricting the identification and, hence, mitigation of such biases. We address this gap by probing VLMs to (1) uncover hidden, implicit associations across 9 bias dimensions. We systematically explore diverse input and output modalities and (2) demonstrate how biased associations vary in their negativity, toxicity, and extremity. Our work (3) identifies subtle and extreme biases that are typically not recognized by existing methodologies. We make the Dataset of retrieved associations, Dora, publicly available.",
      "citation": "@inproceedings{raj-etal-2024-biasdora,\n  title = {{B}ias{D}ora: Exploring Hidden Biased Associations in Vision-Language Models},\n  author = {Raj, Chahat and Mukherjee, Anjishnu and Caliskan, Aylin and Anastasopoulos, Antonios and Zhu, Ziwei},\n  booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2024},\n  month = {November},\n  year = {2024},\n  address = {Miami, Florida, USA},\n  publisher = {Association for Computational Linguistics},\n  url = {https://aclanthology.org/2024.findings-emnlp.611/},\n  doi = {10.18653/v1/2024.findings-emnlp.611},\n  pages = {10439--10455}\n}",
      "identifier": [
        {
          "@type": "PropertyValue",
          "propertyID": "arXiv",
          "value": "2407.02066"
        }
      ],
      "author": [
        {
          "@type": "Person",
          "name": "Chahat Raj"
        },
        {
          "@type": "Person",
          "name": "Anjishnu Mukherjee"
        },
        {
          "@type": "Person",
          "name": "Aylin Caliskan"
        },
        {
          "@type": "Person",
          "name": "Antonios Anastasopoulos"
        },
        {
          "@type": "Person",
          "name": "Ziwei Zhu"
        }
      ],
      "datePublished": "2024",
      "image": "https://iamshnoo.github.io/images/optimized/full/biasdora-full.01ecb43218.webp",
      "isPartOf": "EMNLP Findings '24 (Poster)",
      "sameAs": [
        "https://www.arxiv.org/pdf/2407.02066",
        "https://github.com/chahatraj/BiasDora"
      ]
    },
    {
      "@id": "https://iamshnoo.github.io/#breaking-bias",
      "@type": "ScholarlyArticle",
      "name": "Breaking Bias, Building Bridges: Evaluation and Mitigation of Social Biases in LLMs via Contact Hypothesis",
      "url": "https://iamshnoo.github.io/breaking_bias_building_bridges/",
      "mainEntityOfPage": "https://iamshnoo.github.io/#breaking-bias",
      "subjectOf": {
        "@type": "WebPage",
        "@id": "https://iamshnoo.github.io/breaking_bias_building_bridges/",
        "url": "https://iamshnoo.github.io/breaking_bias_building_bridges/",
        "name": "Breaking Bias, Building Bridges: Evaluation and Mitigation of Social Biases in LLMs via Contact Hypothesis project page"
      },
      "description": "Large Language Models (LLMs) perpetuate social biases, reflecting prejudices in their training data and reinforcing societal stereotypes and inequalities. Our work explores the potential of the Contact Hypothesis, a concept from social psychology for debiasing LLMs. We simulate various forms of social contact through LLM prompting to measure their influence on the model's biases, mirroring how intergroup interactions can reduce prejudices in social contexts. We create a dataset of 108,000 prompts following a principled approach replicating social contact to measure biases in three LLMs (LLaMA 2, Tulu, and NousHermes) across 13 social bias dimensions. We propose a unique debiasing technique, Social Contact Debiasing (SCD), that instruction-tunes these models with unbiased responses to prompts. Our research demonstrates that LLM responses exhibit social biases when subject to contact probing, but more importantly, these biases can be significantly reduced by up to 40% in 1 epoch of instruction tuning LLaMA 2 following our SCD strategy.",
      "citation": "@article{raj2024breaking,\n  title = {Breaking Bias, Building Bridges: Evaluation and Mitigation of Social Biases in LLMs via Contact Hypothesis},\n  author = {Raj, Chahat and Mukherjee, Anjishnu and Caliskan, Aylin and Anastasopoulos, Antonios and Zhu, Ziwei},\n  journal = {Proceedings of the AAAI/ACM Conference on AI, Ethics, and Society},\n  volume = {7},\n  number = {1},\n  pages = {1180--1189},\n  year = {2024},\n  doi = {10.1609/aies.v7i1.31715},\n  url = {https://doi.org/10.1609/aies.v7i1.31715}\n}",
      "identifier": [
        {
          "@type": "PropertyValue",
          "propertyID": "arXiv",
          "value": "2407.02030"
        }
      ],
      "author": [
        {
          "@type": "Person",
          "name": "Chahat Raj*"
        },
        {
          "@type": "Person",
          "name": "Anjishnu Mukherjee*"
        },
        {
          "@type": "Person",
          "name": "Aylin Caliskan"
        },
        {
          "@type": "Person",
          "name": "Antonios Anastasopoulos"
        },
        {
          "@type": "Person",
          "name": "Ziwei Zhu"
        }
      ],
      "datePublished": "2024",
      "image": "https://iamshnoo.github.io/images/optimized/full/breaking-bias-full.0c2153a675.webp",
      "isPartOf": "AIES '24 (Oral)",
      "sameAs": [
        "https://arxiv.org/pdf/2407.02030",
        "https://github.com/chahatraj/breakingbias",
        "https://iamshnoo.github.io/breaking_bias_building_bridges/"
      ]
    },
    {
      "@id": "https://iamshnoo.github.io/#global-gallery",
      "@type": "ScholarlyArticle",
      "name": "Global Gallery: The Fine Art of Painting Culture Portraits through Multilingual Instruction Tuning",
      "url": "https://iamshnoo.github.io/global_gallery/",
      "mainEntityOfPage": "https://iamshnoo.github.io/#global-gallery",
      "subjectOf": {
        "@type": "WebPage",
        "@id": "https://iamshnoo.github.io/global_gallery/",
        "url": "https://iamshnoo.github.io/global_gallery/",
        "name": "Global Gallery: The Fine Art of Painting Culture Portraits through Multilingual Instruction Tuning project page"
      },
      "description": "Exploring the intersection of language and culture in Large Language Models (LLMs), this study critically examines their capability to encapsulate cultural nuances across diverse linguistic landscapes. Central to our investigation are three research questions: the efficacy of language-specific instruction tuning, the impact of pretraining on dominant language data, and the identification of optimal approaches to elicit accurate cultural knowledge from LLMs. Utilizing the GeoMLaMA benchmark for multilingual commonsense knowledge and an adapted CAMeL dataset (English-only) for evaluation of nuanced cultural aspects, our experiments span six different languages and cultural contexts, revealing the extent of LLMs' cultural awareness. Our findings highlight a nuanced landscape: while language-specific tuning and bilingual pretraining enhance cultural understanding in certain contexts, they also uncover inconsistencies and biases, particularly in non-Western cultures. This work expands our understanding of LLMs' cultural competence and emphasizes the importance of integrating diverse cultural perspectives in their development, aiming for a more globally representative and equitable approach in language modeling.",
      "citation": "@inproceedings{mukherjee-etal-2024-global,\n  title = {Global Gallery: The Fine Art of Painting Culture Portraits through Multilingual Instruction Tuning},\n  author = {Mukherjee, Anjishnu and Caliskan, Aylin and Zhu, Ziwei and Anastasopoulos, Antonios},\n  booktitle = {Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)},\n  month = {June},\n  year = {2024},\n  address = {Mexico City, Mexico},\n  publisher = {Association for Computational Linguistics},\n  url = {https://aclanthology.org/2024.naacl-long.355/},\n  doi = {10.18653/v1/2024.naacl-long.355},\n  pages = {6398--6415}\n}",
      "identifier": [
        {
          "@type": "PropertyValue",
          "propertyID": "ACL Anthology",
          "value": "2024.naacl-long.355.pdf"
        }
      ],
      "author": [
        {
          "@type": "Person",
          "name": "Anjishnu Mukherjee"
        },
        {
          "@type": "Person",
          "name": "Aylin Caliskan"
        },
        {
          "@type": "Person",
          "name": "Ziwei Zhu"
        },
        {
          "@type": "Person",
          "name": "Antonios Anastasopoulos"
        }
      ],
      "datePublished": "2024",
      "image": "https://iamshnoo.github.io/images/optimized/full/global-gallery-full.58e0cf6822.webp",
      "isPartOf": "NAACL '24 (Poster)",
      "sameAs": [
        "https://aclanthology.org/2024.naacl-long.355.pdf",
        "https://github.com/iamshnoo/culture-llm",
        "https://iamshnoo.github.io/global_gallery/"
      ]
    },
    {
      "@id": "https://iamshnoo.github.io/#global-voices",
      "@type": "ScholarlyArticle",
      "name": "Global Voices, Local Biases: Socio-cultural Prejudices across Languages",
      "url": "https://iamshnoo.github.io/global_voices_local_biases/",
      "mainEntityOfPage": "https://iamshnoo.github.io/#global-voices",
      "subjectOf": {
        "@type": "WebPage",
        "@id": "https://iamshnoo.github.io/global_voices_local_biases/",
        "url": "https://iamshnoo.github.io/global_voices_local_biases/",
        "name": "Global Voices, Local Biases: Socio-cultural Prejudices across Languages project page"
      },
      "description": "Human biases are ubiquitous but not uniform: disparities exist across linguistic, cultural, and societal borders. As large amounts of recent literature suggest, language models (LMs) trained on human data can reflect and often amplify the effects of these social biases. However, the vast majority of existing studies on bias are heavily skewed towards Western and European languages. In this work, we scale the Word Embedding Association Test (WEAT) to 24 languages, enabling broader studies and yielding interesting findings about LM bias. We additionally enhance this data with culturally relevant information for each language, capturing local contexts on a global scale. Further, to encompass more widely prevalent societal biases, we examine new bias dimensions across toxicity, ableism, and more. Moreover, we delve deeper into the Indian linguistic landscape, conducting a comprehensive regional bias analysis across six prevalent Indian languages. Finally, we highlight the significance of these social biases and the new dimensions through an extensive comparison of embedding methods, reinforcing the need to address them in pursuit of more equitable language models.",
      "citation": "@inproceedings{mukherjee-etal-2023-global,\n  title = {{G}lobal {V}oices, Local Biases: Socio-Cultural Prejudices across Languages},\n  author = {Mukherjee, Anjishnu and Raj, Chahat and Zhu, Ziwei and Anastasopoulos, Antonios},\n  booktitle = {Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing},\n  month = {December},\n  year = {2023},\n  address = {Singapore},\n  publisher = {Association for Computational Linguistics},\n  url = {https://aclanthology.org/2023.emnlp-main.981/},\n  doi = {10.18653/v1/2023.emnlp-main.981},\n  pages = {15828--15845}\n}",
      "identifier": [
        {
          "@type": "PropertyValue",
          "propertyID": "ACL Anthology",
          "value": "2023.emnlp-main.981.pdf"
        }
      ],
      "author": [
        {
          "@type": "Person",
          "name": "Anjishnu Mukherjee*"
        },
        {
          "@type": "Person",
          "name": "Chahat Raj*"
        },
        {
          "@type": "Person",
          "name": "Ziwei Zhu"
        },
        {
          "@type": "Person",
          "name": "Antonios Anastasopoulos"
        }
      ],
      "datePublished": "2023",
      "image": "https://iamshnoo.github.io/images/optimized/full/global-voices-full.48d09868c2.webp",
      "isPartOf": "EMNLP '23 (Poster)",
      "sameAs": [
        "https://aclanthology.org/2023.emnlp-main.981.pdf",
        "https://github.com/iamshnoo/weathub",
        "https://iamshnoo.github.io/global_voices_local_biases/"
      ]
    }
  ]
}