<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20151215//EN" "http://jats.nlm.nih.gov/publishing/1.3/JATS-journalpublishing1-3.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" dtd-version="1.1" specific-use="sps-1.9" article-type="research-article" xml:lang="en">
    <front>
        <journal-meta>
            <journal-id journal-id-type="publisher-id">tinf</journal-id>
            <journal-title-group>
                <journal-title>Transinformação</journal-title>
                <abbrev-journal-title abbrev-type="publisher">Transinformação</abbrev-journal-title>
            </journal-title-group>
            <issn pub-type="ppub">0103-3786</issn>
            <issn pub-type="epub">2318-0889</issn>
            <publisher>
                <publisher-name>Pontifícia Universidade Católica de Campinas</publisher-name>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="other">00328</article-id>
            <article-id pub-id-type="doi">10.1590/2318-0889202537e2515501</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>ORIGINAL</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>A tool for bibliometric analysis of journals indexed in Google Scholar Metrics and OpenAlex</article-title>
                <trans-title-group xml:lang="pt">
                    <trans-title>Ferramenta para análise bibliométrica de periódicos indexados no Google Scholar Metrics e OpenAlex</trans-title>
                </trans-title-group>
            </title-group>
            <contrib-group>
                <contrib contrib-type="author">
                    <contrib-id contrib-id-type="orcid">0000-0002-8761-2178</contrib-id>
                    <name>
                        <surname>Gavron</surname>
                        <given-names>Edson Mário</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/contributor-roles/data-curation">Data Curation</role>
                    <role content-type="http://credit.niso.org/contributor-roles/investigation">Investigation</role>
                    <role content-type="http://credit.niso.org/contributor-roles/methodology">Methodology</role>
                    <role content-type="http://credit.niso.org/contributor-roles/writing-original-draft">Writing – Original Draft</role>
                    <role content-type="http://credit.niso.org/contributor-roles/writing-review-editing">Writing – Review &amp; Editing</role>
                    <xref ref-type="aff" rid="aff01">1</xref>
                    <xref ref-type="corresp" rid="c01"/>
                </contrib>
                <contrib contrib-type="author">
                    <contrib-id contrib-id-type="orcid">0000-0002-4142-2061</contrib-id>
                    <name>
                        <surname>Pinto</surname>
                        <given-names>Adilson Luiz</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/contributor-roles/conceptualization">Conceptualization</role>
                    <role content-type="http://credit.niso.org/contributor-roles/methodology">Methodology</role>
                    <role content-type="http://credit.niso.org/contributor-roles/supervision">Supervision</role>
                    <role content-type="http://credit.niso.org/contributor-roles/writing-review-editing">Writing – Review &amp; Editing</role>
                    <xref ref-type="aff" rid="aff01">1</xref>
                </contrib>
                <contrib contrib-type="author">
                    <contrib-id contrib-id-type="orcid">0000-0002-8338-1931</contrib-id>
                    <name>
                        <surname>Canto</surname>
                        <given-names>Fábio Lorensi do</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/contributor-roles/supervision">Supervision</role>
                    <role content-type="http://credit.niso.org/contributor-roles/validation">Validation</role>
                    <role content-type="http://credit.niso.org/contributor-roles/writing-review-editing">Writing – Review &amp; Editing</role>
                    <xref ref-type="aff" rid="aff02">2</xref>
                </contrib>
                <contrib contrib-type="author">
                    <contrib-id contrib-id-type="orcid">0000-0001-7027-1023</contrib-id>
                    <name>
                        <surname>Talau</surname>
                        <given-names>Marcos</given-names>
                    </name>
                    <role content-type="http://credit.niso.org/contributor-roles/data-curation">Data Curation</role>
                    <role content-type="http://credit.niso.org/contributor-roles/software">Software</role>
                    <role content-type="http://credit.niso.org/contributor-roles/writing-review-editing">Writing – Review &amp; Editing</role>
                    <xref ref-type="aff" rid="aff03">3</xref>
                </contrib>
            </contrib-group>
            <aff id="aff01">
                <label>1</label>
                <institution content-type="orgname">Universidade Federal de Santa Catarina</institution>
                <institution content-type="orgdiv1">Centro de Ciências da Educação</institution>
                <institution content-type="orgdiv2">Programa de Pós-Graduação em Ciência da Informação</institution>
                <addr-line>
                    <city>Florianópolis</city>
                    <state>SC</state>
                </addr-line>
                <country country="BR">Brasil</country>
                <institution content-type="original">Universidade Federal de Santa Catarina, Centro de Ciências da Educação, Programa de Pós-Graduação em Ciência da Informação. Florianópolis, SC, Brasil.</institution>
            </aff>
            <aff id="aff02">
                <label>2</label>
                <institution content-type="orgname">Universidade Federal de Santa Catarina</institution>
                <institution content-type="orgdiv1">Biblioteca Universitária</institution>
                <addr-line>
                    <city>Florianópolis</city>
                    <state>SC</state>
                </addr-line>
                <country country="BR">Brasil</country>
                <institution content-type="original">Universidade Federal de Santa Catarina, Reitoria, Biblioteca Universitária. Florianópolis, SC, Brasil.</institution>
            </aff>
            <aff id="aff03">
                <label>3</label>
                <institution content-type="orgname">Universidade Tecnológica do Paraná</institution>
                <institution content-type="orgdiv1">Departamento Acadêmico de Eletrônica</institution>
                <addr-line>
                    <city>Curitiba</city>
                    <state>PR</state>
                </addr-line>
                <country country="BR">Brasil</country>
                <institution content-type="original">Universidade Tecnológica do Paraná, Campus Curitiba, Departamento Acadêmico de Eletrônica. Curitiba, PR, Brasil.</institution>
            </aff>
            <author-notes>
                <corresp id="c01">Correspondence to: E. M. Gavron. E-mail: <email>edson.gavron@ufsc.br</email>. </corresp>
                <fn fn-type="edited-by">
                    <label>Editor</label>
                    <p>Luisa Angélica Paraguai Donati</p>
                </fn>
                <fn fn-type="coi-statement">
                    <label>Conflict of interest</label>
                    <p>The authors declare that they have no conflicts of interest.</p>
                </fn>
            </author-notes>
            <pub-date publication-format="electronic" date-type="pub">
                <day>0</day>
                <month>0</month>
                <year>2025</year>
            </pub-date>
            <pub-date publication-format="electronic" date-type="collection">
                <year>2025</year>
            </pub-date>
            <volume>37</volume>
            <elocation-id>e2515501</elocation-id>
            <history>
                <date date-type="received">
                    <day>30</day>
                    <month>04</month>
                    <year>2025</year>
                </date>
                <date date-type="accepted">
                    <day>21</day>
                    <month>10</month>
                    <year>2025</year>
                </date>
            </history>
            <permissions>
                <license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/" xml:lang="en">
                    <license-p>This is an Open Access article distributed under the terms of the Creative Commons Attribution License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original work is properly cited.</license-p>
                </license>
            </permissions>
            <abstract>
                <title>Abstract</title>
                <p>Google Scholar Metrics is one of the leading free tools for assessing the impact of academic journals; however, it presents significant limitations, such as the absence of a master list of journals and restrictions on automated data extraction. To overcome these challenges, this study presents the development of the GSM-ALEX_data tool, aimed at the extraction, integration, and bibliometric analysis of journals indexed in both Google Scholar Metrics and the OpenAlex database. The methodology involved the construction of four Python scripts responsible for automating data collection from both platforms, matching records, and structuring an integrated database. The results indicate that GSM-ALEX_data is effective for conducting large-scale bibliometric analyses and providing editorial data, metrics, and publications in an integrated manner. The main originality of the proposal lies in overcoming the limitations of Google Scholar Metrics through its combination with open data. It is concluded that the tool represents a resource for researchers by integrating data from platforms such as Google Scholar Metrics and OpenAlex, thereby expanding the scope available for bibliometric analyses.</p>
            </abstract>
            <trans-abstract xml:lang="pt">
                <title>Resumo</title>
                <p>O Google Scholar Metrics é uma das principais ferramentas gratuitas para avaliação do impacto de periódicos acadêmicos, mas apresenta sérias limitações, como ausência de uma lista mestre de periódicos e restrições à extração automatizada de dados. Visando superar esses entraves, este estudo apresenta o desenvolvimento da ferramenta GSM-ALEX_data, voltada à extração, integração e análise bibliométrica de periódicos indexados no Google Scholar Metrics e na base OpenAlex. A metodologia envolveu a construção de quatro scripts em Python, responsáveis por automatizar a coleta de dados nas duas plataformas, realizar a correspondência entre os registros e estruturar um banco de dados integrado. Os resultados indicam que o GSM-ALEX_data é eficaz para realizar análises bibliométricas em larga escala, fornecendo dados editoriais, métricas e publicações de forma integrada. A principal originalidade da proposta está na superação das barreiras do Google Scholar Metrics por meio da combinação com dados abertos. Conclui-se que a ferramenta representa um recurso valioso para pesquisadores ao integrar dados de plataformas como o Google Scholar Metrics e o OpenAlex, ampliando o universo disponível para análises bibliométricas.</p>
            </trans-abstract>
            <kwd-group xml:lang="en">
                <title>Keywords</title>
                <kwd>Bibliometric tool</kwd>
                <kwd>Google Scholar Metrics</kwd>
                <kwd>Journal evaluation</kwd>
                <kwd>OpenAlex</kwd>
            </kwd-group>
            <kwd-group xml:lang="pt">
                <title>Palavras-chave</title>
                <kwd>Ferramenta bibliométrica</kwd>
                <kwd>Google Scholar Metrics</kwd>
                <kwd>Avaliação de periódicos</kwd>
                <kwd>OpenAlex</kwd>
            </kwd-group>
        </article-meta>
    </front>
    <body>
        <sec sec-type="intro">
            <title>Introduction</title>
            <p>Google Scholar Metrics (GSM) is a tool used to measure the impact of academic journals. Google Scholar (GS) launched the tool in 2012 and calculates its metrics using citations from all articles indexed in its database, including those not directly covered by GSM (<xref ref-type="bibr" rid="B08">Google Scholar, 2024</xref>). Its purpose is to assess the visibility and influence of recent articles by summarizing citations across a wide range of academic publications, thereby guiding authors in selecting where to publish their research. GSM employs the h-index as its core metric, applying a five-year time window and calculating the median h-index (<xref ref-type="bibr" rid="B08">Google Scholar, 2024</xref>).</p>
            <p>The GS is a search engine that indexes only academic documents. Its crawlers continuously scan university websites, publishers, repositories, databases, and other scholarly sources on the web/ without thematic or linguistic restrictions (<xref ref-type="bibr" rid="B07">Delgado López-Cózar; Orduña-Malea; Martín-Martín, 2019</xref>). This automated indexing approach makes GSM a tool that minimizes selection bias common to commercial databases, as it includes journals from various countries and languages, often covering more regionally focused topics such as the humanities and social sciences (<xref ref-type="bibr" rid="B10">Jacsó, 2012</xref>; <xref ref-type="bibr" rid="B11">Leydesdorff; Wouters; Bornmann, 2016</xref>; <xref ref-type="bibr" rid="B16">Orduña-Malea; Delgado López-Cózar, 2014</xref>; <xref ref-type="bibr" rid="B21">Waltman, 2016</xref>).</p>
            <p>Another point to highlight is that GS is among the platforms with the broadest coverage of bibliometric data (<xref ref-type="bibr" rid="B12">Martín-Martín <italic>et al.</italic>, 2021</xref>). Moreover, due to its free access and user-friendly interface, its use has grown significantly within the academic community (<xref ref-type="bibr" rid="B01">Canto <italic>et al.</italic>, 2022</xref>). These characteristics have contributed to GSM’s popularity as an alternative for analyzing academic impact through citation metrics.</p>
            <p>However, there are several limitations associated with GSM. One of them is the inability to extract data directly from the system, which hinders broader studies on temporal coverage and prevents large-scale analyses (<xref ref-type="bibr" rid="B15">Orduña-Malea; Aytac; Tran, 2019</xref>). Another issue is the lack of transparency regarding the number of indexed journals or the absence of a master list (<xref ref-type="bibr" rid="B05">Delgado López-Cózar; Cabezas-Clavijo, 2012</xref>). Additionally, there is no verification mechanism for indexed content, which may lead to duplicate records or unjustified exclusion of journals that meet the indexing criteria (<xref ref-type="bibr" rid="B04">Costa; Canto; Pinto, 2020</xref>). The lack of <italic>International Standard Serial Number</italic> (ISSN) search functionality further complicates matters, as this identifier could address inconsistencies in record standardization (<xref ref-type="bibr" rid="B04">Costa; Canto; Pinto, 2020</xref>).</p>
            <p>Conducting large-scale studies using GSM data remains a challenge. Nonetheless, despite its limitations, GSM is still considered a promising tool for bibliometric research (<xref ref-type="bibr" rid="B01">Canto <italic>et al.</italic>, 2022</xref>; <xref ref-type="bibr" rid="B07">Delgado López-Cózar; Orduña-Malea; Martín-Martín, 2019</xref>).</p>
            <p>Since GSM does not provide a master list or an API, users must perform journal queries by title. However, this method introduces automation difficulties and requires manual resolution, especially in cases involving homonymous titles or duplicate indexing. These issues stem from lacking additional disambiguating metadata beyond the journal-title.</p>
            <p>The literature reports initiatives that use GSM for metric studies involving more than 1,000 journal titles, making manual data collection impractical or overly labor-intensive (<xref ref-type="bibr" rid="B01">Canto <italic>et al.</italic>, 2022</xref>; <xref ref-type="bibr" rid="B05">Delgado López-Cózar; Cabezas-Clavijo, 2012</xref>, <xref ref-type="bibr" rid="B06">2013</xref>; <xref ref-type="bibr" rid="B16">Orduña-Malea; Delgado López-Cózar, 2014</xref>; <xref ref-type="bibr" rid="B17">Pinto <italic>et</italic> al., 2020</xref>).</p>
            <p>One solution to automate this process would be utilizing article data available in GSM lists. Until a few years ago, this approach was more complex to implement. However, automation has become feasible with the availability of complete bibliographic data on platforms like OpenAlex.</p>
            <p>OpenAlex is a platform that incorporates data from Microsoft Academic. It was created following Microsoft’s announcement of discontinuing its service in 2021 and has since established itself as a comprehensive source of bibliographic data (<xref ref-type="bibr" rid="B19">Scheidsteger; Haunschild, 2023</xref>; <xref ref-type="bibr" rid="B22">Zhang <italic>et</italic> al., 2024</xref>).</p>
            <p><xref ref-type="bibr" rid="B12">Martín-Martín <italic>et al.</italic> (2021)</xref> conducted a study on the coverage of highly cited articles across 252 subject categories. The researchers investigated whether Web of Science, Scopus, Dimensions, OpenCitations, Microsoft Academic, and Google Scholar indexed the articles. Microsoft Academic was identified as the second most comprehensive database, behind only GS.</p>
            <p>OpenAlex demonstrates broad coverage across scholarly content, data interoperability through persistent identifiers such as Digital Object Identifier (DOI), open access, and a field structure organized in multiple levels of granularity. These features enable detailed analyses of subfields and support tools such as APIs and snapshots for fast and efficient data access (<xref ref-type="bibr" rid="B09">Hao <italic>et al.</italic>, 2022</xref>; <xref ref-type="bibr" rid="B13">Mongeon; Bowman; Costas, 2023</xref>; <xref ref-type="bibr" rid="B14">Okamura, 2023</xref>). As such, OpenAlex stands out as a robust alternative, particularly for recent publications and interdisciplinary analyses. It offers significant advantages, including open access and integration with multiple sources, demonstrating great potential for research reporting and bibliometric analysis (<xref ref-type="bibr" rid="B18">Rodrigues; Lopes; Batista, 2023</xref>; <xref ref-type="bibr" rid="B19">Scheidsteger; Haunschild, 2023</xref>; <xref ref-type="bibr" rid="B20">Schnieders <italic>et</italic> al., 2022</xref>).</p>
            <p>In this context, a feasible alternative for mitigating GSM’s limitations in large-scale studies is to use open bibliographic data, as it enables the integration of GSM information and facilitates enriched analysis. For this reason, OpenAlex emerges as a viable alternative, offering open access to an extensive catalog of scientific articles, authors, and institutions, along with the possibility of downloading the entire database.</p>
            <p>Accordingly, this study aimed to develop a tool to extract data from GSM using OpenAlex as a base, allowing information integration and more detailed bibliometric analysis. This is an extension of the work presented in <xref ref-type="bibr" rid="B02">Canto <italic>et al.</italic> (2024)</xref>, expanding the automation of the GSM data extraction and analysis process and, above all, adding a validation stage for the results based on OpenAlex data.</p>
        </sec>
        <sec sec-type="methods">
            <title>Methodological Procedures</title>
            <p>This study proposes the implementation of automated routines for extracting bibliographic and bibliometric data from the GSM and OpenAlex platforms. To develop these routines, the researchers used the Python 3 programming language, along with libraries such as CSV, JSON, OS, GLOB, GZIP, SYS, Selenium, Datetime, Argparse, Random, Difflib, Natural Language Toolkit (NLTK), Unidecode, DuckDB, and Strftime. Together, these libraries enabled the implementation of the scripts.</p>
            <p>To support the development of the routines, the researchers utilized a validated dataset, publicly available in the Zenodo research data repository. This dataset, produced by <xref ref-type="bibr" rid="B01">Canto <italic>et al.</italic> (2022)</xref>, comprises information on scientific journals from Latin American and Caribbean countries extracted from GSM. It served as the foundation for executing the automated routines, thereby enabling systematic monitoring and validation of the scripts’ accuracy.</p>
            <p>The researchers designed the script development to maximize the collection of GSM data, aiming to conduct a comprehensive survey of the content indexed on the platform. The objective was to map, as completely as possible, the journals with bibliometric data available in GSM, enabling large-scale analysis. Some process adjustments were necessary specific approaches, such as working with a predefined list of journals.</p>
            <p>This study chose a complete download of the OpenAlex dataset, even though the platform offers tools that simplify data collection and analysis, including a suite of APIs that allow querying its information base as needed. By choosing to use the complete OpenAlex database, data processing became significantly faster than API-based access due to request limitations. The data is compressed and distributed across six directories: Source (Journals, Proceedings, etc.), Work (Articles, Books, etc.), Author, Institution, Concept (Indexing Terms), and Publisher. However, the total size of the uncompressed files easily exceeds 3TB, making it unfeasible to use on computers with limited storage capacity. Therefore, the researchers implemented strategies to process the data in smaller parts.</p>
            <p>Another source of data collection was GSM, which does not offer tools for data extraction. Consequently, scraping data from the input list one journal-title at a time was necessary. The alternative adopted for large-scale queries was the use of URLs, allowing for the automation of searches by inserting the journal titles into the search structure. The researchers retrieved the data by accessing the HTML structure and applying field-location techniques to scrape the desired information.</p>
            <p>While developing the GSM URL-based data extraction, the researchers observed that Google monitors the volume of requests made to its page. When the system detects excessive access, it activates restriction mechanisms, such as requiring a CAPTCHA to verify user authenticity. If the requests persist, the system may temporarily block the IP address. The study also found that IPs associated with academic institutions, especially when combined with request time randomization, tended to be tolerated and were not blocked by the server. Therefore, to circumvent these limitations, the script was configured to acquire data at randomized intervals between 10 and 30 seconds, using IP addresses from educational institutions.</p>
        </sec>
        <sec>
            <title>Tool structure</title>
            <p>The data extraction tool developed consists of four main scripts: GSM_search, OpenAlex_Source, OpenAlex_Works, and GSM-ALEX_merge, collectively referred to as GSM-ALEX_data. The GSM_search script was designed to perform searches based on a list of journal titles, while OpenAlex_Source collects information related to the journals. In turn, OpenAlex_Works is responsible for extracting article data, and the GSM-ALEX_merge script performs verification and matching between GSM and OpenAlex records, ensuring integration between the two datasets.</p>
            <p>It is important to highlight that the researchers developed the GSM_search script based on the GSM_hdata script by <xref ref-type="bibr" rid="B02">Canto <italic>et al.</italic> (2024)</xref>, specifically its GSM_hsearch section. Although both scripts perform similar tasks, they were designed with distinct functionalities to meet specific requirements and incorporate several improvements for enhanced automation. The team also developed the remaining three scripts to manage the data acquisition processes between the two sources, GSM and OpenAlex (<xref ref-type="fig" rid="f01">Figure 1</xref>).</p>
            <fig id="f01">
                <label>Figure 1</label>
                <caption>
                    <title>Flowchart of GSM-ALEX_data.</title>
                </caption>
                <graphic xlink:href="2318-0889-tinf-37-e2515501-gf01.jpg"/>
                <attrib>Source: Prepared by the authors (2025).</attrib>
            </fig>
            <p>The data integration stage (data merge) of the flowchart was structured to ensure consistency between the sets of information. Using a script, the fields illustrated in <xref ref-type="fig" rid="f03">Figure 3</xref> were collected and saved in a spreadsheet-compatible format. This file was then combined with the output file from OpenAlex_Source, using the journal’s unique ID as the connection key. In this way, it was possible to generate a consolidated spreadsheet that brings together the data shown in <xref ref-type="fig" rid="f03">Figure 3</xref> and the fields detailed in <xref ref-type="table" rid="t01">Table 1</xref>.</p>
            <fig id="f03">
                <label>Figure 3</label>
                <caption>
                    <title>Example of TXT file format generated by GSM-ALEX_merge.</title>
                </caption>
                <graphic xlink:href="2318-0889-tinf-37-e2515501-gf03.jpg"/>
                <attrib>Source: Prepared by the authors (2025).</attrib>
            </fig>
            <table-wrap id="t01">
                <label>Table 1</label>
                <caption>
                    <title>Fields extracted from journals in OpenAlex, Brazil, 2025.</title>
                </caption>
                <table frame="hsides" rules="groups">
                    <thead>
                        <tr align="center">
                            <th align="left">Field</th>
                            <th>Description</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr align="center">
                            <td align="left">ID</td>
                            <td>Unique identifier link for each journal</td>
                        </tr>
                        <tr align="center">
                            <td align="left">issn_l</td>
                            <td>Journal ISSN</td>
                        </tr>
                        <tr align="center">
                            <td align="left">Issn</td>
                            <td>When there is more than one ISSN, the journal is recorded in this field</td>
                        </tr>
                        <tr align="center">
                            <td align="left">display_name</td>
                            <td>Journal title</td>
                        </tr>
                        <tr align="center">
                            <td align="left">Publisher</td>
                            <td>Publisher</td>
                        </tr>
                        <tr align="center">
                            <td align="left">is_oa</td>
                            <td>Whether the journal is open-access</td>
                        </tr>
                        <tr align="center">
                            <td align="left">is_in_doaj</td>
                            <td>Whether it is indexed in DOAJ</td>
                        </tr>
                        <tr align="center">
                            <td align="left">Type</td>
                            <td>Source type (Journal, Proceedings, etc.)</td>
                        </tr>
                        <tr align="center">
                            <td align="left">country_code</td>
                            <td>Country ISO code</td>
                        </tr>
                        <tr align="center">
                            <td align="left">alternate_titles</td>
                            <td>Title variations</td>
                        </tr>
                        <tr align="center">
                            <td align="left">abbreviated_title</td>
                            <td>Title abbreviations</td>
                        </tr>
                        <tr align="center">
                            <td align="left">works_count</td>
                            <td>Total publications</td>
                        </tr>
                        <tr align="center">
                            <td align="left">cited_by_count</td>
                            <td>Total number of citations for the journal’s articles hosted in OpenAlex</td>
                        </tr>
                        <tr align="center">
                            <td align="left">h_index</td>
                            <td>Total h-index</td>
                        </tr>
                        <tr align="center">
                            <td align="left">i10_index</td>
                            <td>i10-index (last 10 years)</td>
                        </tr>
                        <tr align="center">
                            <td align="left">x_concepts_display_name</td>
                            <td>Concepts assigned to the journal based on the concepts of its articles</td>
                        </tr>
                    </tbody>
                </table>
                <table-wrap-foot>
                    <attrib>Source: Prepared by the authors (2025).</attrib>
                    <fn>
                        <p>Note: DOAJ: Directory of Open Access Journals; ISSN: International Standard Serial Number.</p>
                    </fn>
                </table-wrap-foot>
            </table-wrap>
        </sec>
        <sec>
            <title>GSM_search</title>
            <p>GSM_search is the first component of GSM-ALEX_data and is responsible for collecting essential information from GSM, including the journal title, h5-index, h5-median, and the corresponding URL. It also identifies up to three articles comprising the journal’s h5 index. Fewer articles will be returned If the h5 index is lower than three.</p>
            <p>The process begins when the script sends a request through the GSM URL, inserting the journal’s title into a specific part of the link. For example, if the title is ‘The Journal of Finance’, the script incorporates this term into the URL to perform the search. The platform may then return a list of data displayed on an HTML table. If it finds no results, it generates an empty table.</p>
            <p>URL: <ext-link ext-link-type="uri" xlink:href="http://scholar.google.com/citations?hl=en&amp;view_op=search_venues&amp;vq=the+journal+of+finance&amp;btnG=">scholar.google.com/citations?hl=en&amp;view_op=search_venues&amp;vq=the+journal+of+finance&amp;btnG=</ext-link></p>
            <p>The script received a list of journal titles as input, which could appear in different formats, for instance, with parentheses containing the journal’s location or other variations. To address this, the researchers established criteria to adjust the titles in cases where the search returned an empty table.</p>
            <p>Initially, the script preserved the title in its original form, except when it consisted of a single word. In such cases, it added double quotation marks to improve search precision. If the search still returned an empty table, the script applied additional strategies. First, it removed numbers enclosed in parentheses. If that proved ineffective, it excluded the entire content within parentheses. When this exclusion reduced the title to a single word, the script enclosed it in double quotation marks to enhance the search’s effectiveness and increase the likelihood of retrieving a valid result.</p>
            <p>This title-processing logic was implemented to enhance the accuracy of search results in specific situations while preserving the content within parentheses when necessary. However, this process increases the time required for data collection, as a single title may need to be queried multiple times in GSM.</p>
            <p>Once a response with data was obtained, GSM_search performed a comparison to verify whether the input text matched the data in the returned table. This verification was based on four criteria, called Term Match (TM), which evaluate the similarity between the input terms and the results returned by GSM. When a match was confirmed, the system created a file for each title and recorded the TM criterion used for the comparison. This process allowed the identification of which TM produced the highest number of matches between the input title and the GSM results.</p>
            <p>The script applied the first criterion based on the percentage of textual similarity. If the similarity reached 90% or higher, it recorded the data under this criterion. It calculated the percentage using the average number of characters in the analyzed titles – approximately 32 characters – with a variation of about three characters. Titles shorter than the average tended to produce higher comparison accuracy, while longer titles could inflate the count and reduce precision.</p>
            <p>Nevertheless, this ±3 character variation proved to provide high accuracy in text comparisons and increased match rates. A test requiring 100% equality between texts was too sensitive to slight variations such as punctuation, accents, or formatting, which often resulted in missed matches. Therefore, the script prioritized the 90% threshold, as it offered greater reliability than stricter criteria.</p>
            <p>The second criterion incorporated Natural Language Processing (NLP) techniques using the NLTK library to address variations in journal title formatting. The goal was to accommodate common differences such as: (a) Abbreviations: e.g., “Rev.” instead of “Revista”; (b) Term substitutions: use of symbols like “&amp;” instead of “AND”; (c) Omission or inclusion of auxiliary words: such as articles and pronouns, which might be inconsistently written or omitted, and (d) Spelling variations: slight differences in spelling or writing style.</p>
            <p>This approach eliminated the need to manually define a stopword list, as the NLP model could automatically detect patterns and interpret context without prior manual filtering.</p>
            <p>The third criterion was introduced to handle cases where the journal title list included subtitles, but the GSM result table did not. This criterion relied on identifying the shortest part of the text and checking whether it was contained in either the input title or the returned result. The process verified whether text A was included in text B or the other way around, thus addressing challenges related to subtitles.</p>
            <p>Finally, the fourth criterion was designed to compare only the letters, disregarding white spaces. This solution addressed situations where the input title contained misspellings or merged two words without the usual spacing.</p>
            <p>The script organized the results as follows: when it found a match in GSM, it saved the collected data in individual .txt files, each with a unique name, within a directory named ‘saved’</p>
            <fig id="f02">
                <label>Figure 2</label>
                <caption>
                    <title>Example of TXT file format from GSM_search.</title>
                </caption>
                <graphic xlink:href="2318-0889-tinf-37-e2515501-gf02.jpg"/>
                <attrib>Source: Prepared by the authors (2025).</attrib>
            </fig>
            <p>In cases where the search returned results that did not meet the established criteria, the script grouped those occurrences into a single file named NOT-match-items-in-table. Conversely, when the search returned no results at all, the script recorded them separately in a specific file named NOT_empty-table.</p>
            <p>Applying these criteria allowed maximizing the number of matches identified during the GSM search process. This approach’s main objective was to identify as many GSM-listed journal titles as possible that bore some relation to the input titles, thereby broadening the scope of data collection.</p>
        </sec>
        <sec>
            <title>OpenAlex_Source</title>
            <p>The second procedure developed, OpenAlex_Source, retrieved relevant information for the bibliometric analysis of journals from OpenAlex, enabling the creation of a dataset that complements the information about journals found in GSM. This process resulted in enhanced datasets, allowing for more rigorous bibliometric analysis of the journals.</p>
            <p>The information collected included unique identifiers for each journal, such as ISSN and OpenAlex ID, editorial details, title variations, countries, quantitative data such as the number of published articles, bibliometric indicators, and subject classifications of the journals. All the data collected for each journal are listed in <xref ref-type="table" rid="t01">Table 1</xref>.</p>
            <p>Data collection was carried out using a Python script. The script processes the JSON files found in the <italic>Source</italic> directory, opens them individually, extracts data from the previously identified fields, and then compiles a single CSV file containing the information listed in <xref ref-type="table" rid="t01">Table 1</xref> for each journal. It is important to note that more fields are available within the OpenAlex JSON files; however, the selection of fields was based on the specific interests of this research.</p>
        </sec>
        <sec>
            <title>OpenAlex_works</title>
            <p>The third stage involved the development of a script called OpenAlex_Works. This script extracts information about journal articles. A filter was applied to collect only data from journals published in the last five years (2018–2023), the same time frame used by GSM to compute its h5-index. All journals indexed in OpenAlex published during this period were selected to compose the dataset. This process was developed to cross-reference data extracted from GSM with that from OpenAlex.</p>
            <p>There are numerous fields available to describe each article. Therefore, a subset of relevant fields was selected to meet the system’s functional requirements. The chosen fields included the article’s ID, title, and source type (i.e., the journal in which the article was published). Regarding the article metadata, the fields collected included: title, authorship, document type, and citation metrics. The selected fields are listed in <xref ref-type="table" rid="t02">Table 2</xref>.</p>
            <table-wrap id="t02">
                <label>Table 2</label>
                <caption>
                    <title>Fields extracted from OpenAlex articles Brazil, 2025.</title>
                </caption>
                <table frame="hsides" rules="groups">
                    <thead>
                        <tr align="center">
                            <th align="left">Field</th>
                            <th>Description</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr align="center">
                            <td align="left">title</td>
                            <td>Article title</td>
                        </tr>
                        <tr align="center">
                            <td align="left">publication_year</td>
                            <td>Year of publication</td>
                        </tr>
                        <tr align="center">
                            <td align="left">primary_location_id</td>
                            <td>Journal identifier number</td>
                        </tr>
                        <tr align="center">
                            <td align="left">primary_location_type</td>
                            <td>Source type</td>
                        </tr>
                        <tr align="center">
                            <td align="left">primary_location_display_name</td>
                            <td>Journal name</td>
                        </tr>
                        <tr align="center">
                            <td align="left">Type</td>
                            <td>Document type</td>
                        </tr>
                        <tr align="center">
                            <td align="left">authors_names</td>
                            <td>Author names</td>
                        </tr>
                        <tr align="center">
                            <td align="left">cited_by_count</td>
                            <td>Total number of citations</td>
                        </tr>
                        <tr align="center">
                            <td align="left">2yr_cited_by_count</td>
                            <td>Number of citations in the last 2 years</td>
                        </tr>
                    </tbody>
                </table>
                <table-wrap-foot>
                    <attrib>Source: Prepared by the authors (2025).</attrib>
                </table-wrap-foot>
            </table-wrap>
            <p>The scraping process for OpenAlex_Works differed from that of OpenAlex_Source. Due to insufficient storage capacity, it was impossible to decompress the entire <italic>Works</italic> directory simultaneously. Therefore, the procedure was carried out in stages. After each processing stage, the corresponding file was deleted to free up space and allow the process to continue.</p>
            <p>To run the OpenAlex_Works script, a computer equipped with 80 GB of RAM and 20 Intel® Xeon® CPU E5-2420 processors, operating at 1.90 GHz, was used. This configuration enabled the entire procedure to be executed simultaneously without dividing the workload across machines with less memory capacity.</p>
            <p>The procedures involving OpenAlex_Source and OpenAlex_Works generated a dataset for in-depth analysis of journals indexed in GSM. The step-by-step, systematized extraction process overcame storage and processing limitations, resulting in CSV files containing the original OpenAlex fields. These files facilitate data integration and future analyses. This dataset complements GSM data and allows for more detailed bibliometric analyses of journals by research area, country, and metrics from both data sources.</p>
        </sec>
        <sec>
            <title>GSM-ALEX_merge</title>
            <p>After completing the three previous procedures, combining the datasets and promoting integration between the source data from OpenAlex and GSM becomes possible. The main goal of GSM-ALEX_merge is to compare data extracted from GSM with the information gathered from OpenAlex_Works, verifying their compatibility. The procedure aimed to identify matches between article titles collected from GSM and those indexed in OpenAlex, ensuring the integrity and reliability of the data analyzed.</p>
            <p>For this comparison, the DuckDB library was used, enabling an efficient database for cross-referencing information. The matching process was based on article titles, comparing those found in GSM with those from the OpenAlex_Works dataset, ensuring more accurate data integration.</p>
            <p>The OpenAlex_Works script was also processed using the same computer with 80 GB of RAM and 20 Intel® Xeon® CPU E5-2420 processors at 1.90 GHz.</p>
            <p>The GSM_source script generated GSM data by creating individual files. The script then opened these files and checked for exact title matches in the OpenAlex_Works dataset. When it found a matching title, it created and saved a new file with a unique name composed of the date, time, and part of the journal title. This approach ensures that each generated file has a distinct identifier in .txt format. The output file included information about the articles that presented matches. This data comprises the article title, journal ID, the filename generated by GSM_search, and the associated information in that file.</p>
            <p>This structured format facilitates the identification and analysis of compatible data between GSM and OpenAlex_Works, allowing for efficient and organized verification.</p>
            <p>The illustration highlights the separation between two generated files: File 1, which contains the data obtained during the GSM_search stage, and File 2, where the articles with matches found in OpenAlex are stored.</p>
            <p>In the illustrated example, two matching articles were identified. In such cases, File 2 records the journal ID, the matched article titles, and the related metric data. This organization facilitates the analysis of cross-referenced information.</p>
            <p>It is also evident in the illustration that the journal title from GSM (File 1) differs from the title in OpenAlex. This occurred in cases where the journal underwent a title change, demonstrating that the system could handle complex situations such as title discontinuities or replacements.</p>
            <p>To improve organization and facilitate analysis, the script created specific files for different scenarios: cases where no article title found a match, cases where at least one article produced a match, and cases where a single article title was linked to more than one journal. In situations with multiple matches, the script also verified whether the journal title from the TXT file matched any of the identified journals. This structure simplified the review process and helped identify potential failures (<xref ref-type="fig" rid="f03">Figure 3</xref>).</p>
        </sec>
        <sec>
            <title>Tool Validation</title>
            <p>The first stage aimed to identify journal titles from a predefined list by checking for matches in GSM using the developed procedure. A list of 688 journal titles was selected, and similarity criteria were established to evaluate textual correspondence. As the script processed the list, titles not found in GSM were manually reviewed, and the script was adjusted to improve identification.</p>
            <p>This phase was essential to detect possible flaws and correct the program logic. The list included journals already indexed in GSM and some that were not, providing a test base for verifying the criteria described in GSM_search.</p>
            <p>Next, the program was tested with a larger dataset to validate the procedures. This new test involved 3.070 journals hosted in the Zenodo data repository. The list originated from a study on Latin American journals indexed in GSM (<xref ref-type="bibr" rid="B03">Canto <italic>et al.</italic>, 2021</xref>). The Zenodo dataset is in spreadsheet format and contains editorial information about these journals, including their titles, as listed in both GSM and Latindex.</p>
            <p>Since the validation data refers to 2021, and some journals may have fallen outside GSM’s inclusion criteria, updated data had to be collected following the same procedures. Thus, the researchers processed the list of 3.070 journals twice: first using the Gsm_hdata tool by <xref ref-type="bibr" rid="B02">Canto <italic>et al.</italic> (2024)</xref> and then using the GSM-ALEX_data tool. This process and its results are illustrated in <xref ref-type="fig" rid="f04">Figure 4</xref>.</p>
            <fig id="f04">
                <label>Figure 4</label>
                <caption>
                    <title>Workflow of the GSM data collection procedure results.</title>
                </caption>
                <graphic xlink:href="2318-0889-tinf-37-e2515501-gf04.jpg"/>
                <attrib>Source: Prepared by the authors (2025).</attrib>
            </fig>
            <p>Using Gsm_hdata, 2.302 journals (75%) were found among the 3.070 titles. It is important to emphasize that this procedure used journal URLs instead of titles to extract data from GSM, which made the list more accurate and eliminated issues related to homonymous journal titles.</p>
            <p>The GSM-ALEX_data procedure returned a positive response for 2.752 (90%) of the 3.070 titles searched. Among the 318 unmatched titles, 309 returned no results in the GSM query, possibly because they were no longer indexed. Therefore, GSM-ALEX_data showed an improvement of 450 additional matched titles (15%) compared to Gsm_hdata.</p>
            <p>To check whether the titles matched between both procedures, the researchers compared the URLs of each journal retrieved from GSM using Python. Among the 2.752 titles found by GSM_Search, 2.260 had the same URL as those found via Gsm_hdata. Based on this test dataset, the system achieved a 98% accuracy rate. It is also important to note that part of the dataset from <xref ref-type="bibr" rid="B03">Canto <italic>et al.</italic> (2021)</xref> was collected manually, whereas GSM_Search is fully automated.</p>
            <p>Another procedure in the system involves verifying the output obtained through GSM_Search, serving as a mechanism to ensure that the journal located in GSM is indeed the intended one. This is done by comparing the titles of the articles. Thus, the 2.752 journal titles were submitted to the GSM-ALEX_merge procedure, resulting in 2.421 journals with one or more equivalent article titles between OpenAlex and GSM. This outcome demonstrates that the combination of the GSM_Search and GSM-ALEX_merge procedures yielded a slight performance improvement, surpassing the 2.302 journals identified by Gsm_hdata. <xref ref-type="table" rid="t03">Table 3</xref> presents the distribution of the textual comparison criteria (TM) applied during the similarity verification process between the input titles and the records retrieved in GSM.</p>
            <table-wrap id="t03">
                <label>Table 3</label>
                <caption>
                    <title>Distribution of Term Match criteria used for textual comparison, Brazil, 2025.</title>
                </caption>
                <table frame="hsides" rules="groups">
                    <thead>
                        <tr align="center">
                            <th rowspan="2" align="left">Criteria</th>
                            <th colspan="2">GSM_search</th>
                            <th rowspan="2">  </th>
                            <th colspan="2">GSM-ALEX_merge</th>
                        </tr>
                        <tr align="center" style="border-top-width:thin;border-top-style:solid">
                            <th><italic>n</italic></th>
                            <th>%</th>
                            <th><italic>n</italic></th>
                            <th>%</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr align="center">
                            <td align="left">TM1</td>
                            <td>2595</td>
                            <td>94,40</td>
                            <td> </td>
                            <td>2285</td>
                            <td>94,34</td>
                        </tr>
                        <tr align="center">
                            <td align="left">TM2</td>
                            <td>150</td>
                            <td>5,45</td>
                            <td> </td>
                            <td>132</td>
                            <td>5,45</td>
                        </tr>
                        <tr align="center">
                            <td align="left">TM3</td>
                            <td>4</td>
                            <td>0,15</td>
                            <td> </td>
                            <td>4</td>
                            <td>1,17</td>
                        </tr>
                        <tr align="center" style="border-top-width:thin;border-top-style:solid">
                            <td align="left">Total</td>
                            <td>2752</td>
                            <td>100</td>
                            <td> </td>
                            <td>2421</td>
                            <td>100</td>
                        </tr>
                    </tbody>
                </table>
                <table-wrap-foot>
                    <attrib>Source: Prepared by the authors (2025).</attrib>
                    <fn>
                        <p>Note: TM: Term Match.</p>
                    </fn>
                </table-wrap-foot>
            </table-wrap>
            <p>It can be observed that TM1 accounted for the majority of cases, totaling 2,595 journals, which represents 94.40% of the total. TM2 corresponded to 5.45% (150 journals). As expected, TM3 and TM4 showed a low incidence (0.15% or none), since they are applied hierarchically: records that do not meet the TM1 and TM2 criteria are reassessed using TM3 and TM4 as a final validation attempt. Consequently, the percentages remain practically mirrored for GSM-ALEX_merge. These results indicate that most matches between input titles and records retrieved in GSM occurred with a high degree of textual similarity, above 90%.</p>
        </sec>
        <sec sec-type="conclusions">
            <title>Final Considerations</title>
            <p>The tool presented in this study overcomes one of the main limitations of GSM: the absence of a master list containing all indexed journals. This is made possible by automating queries and data collection from GSM, enabling large-scale bibliometric analyses.</p>
            <p>Another relevant aspect is that GSM does not provide editorial information about the journals. With GSM-ALEX_data, linking editorial data to journals indexed in GSM via a spreadsheet becomes possible. Furthermore, the tool enables the incorporation of bibliometric indicators from OpenAlex, such as the h-index, total number of citations, and number of published articles, allowing for a more in-depth analysis of the journals.</p>
            <p>Among the limitations identified is the time required to extract data from large lists, especially when dealing with more than 100,000 titles. Since the tool must simulate human behavior, data collection can take over a month on a single machine; however, this limitation can be easily overcome by using multiple machines. It is important to note that the research used a dataset of 3,070 titles, covering the entire Latin America and Caribbean region, which was collected in no more than two days.</p>
            <p>Since it is a tool that performs scraping, a common limitation in this context is its reliance on the maintenance of consistent patterns in the page structure and security settings. Should any of these patterns be changed, the system will need to be adapted to ensure the continuity of the process.</p>
            <p>Another limitation is the volume of data, which demands a computer with compatible RAM and storage capacity, as used in this research. Additionally, when certain information is missing in OpenAlex, such as details about editors or the country of publication, the ability to conduct specific analyses for those journals may be affected.</p>
            <p>In light of these limitations, an important gap is identified for future studies, such as adapting the procedures to be carried out via the OpenAlex API, which would allow the process to be applied even on machines with more modest processing capacity, as well as enabling a comparison of these results with those obtained in the present research.</p>
            <p>Furthermore, since some data inconsistencies were observed during the procedures, it is essential to conduct a more detailed empirical study to assess the extent to which these inconsistencies may affect the quality of a bibliometric analysis.</p>
            <p>The researchers identified improvements to be made to GSM-ALEX_data, such as developing a user-friendly interface for the system, which currently runs without one and therefore limits its use by a wider group of researchers. They also plan to add a criterion that will allow duplicate records to be identified in advance, making it easier to clean the data more efficiently.</p>
            <p>Despite these limitations, GSM-ALEX_data proved to be an effective tool for extracting data from GSM and OpenAlex, combining information from both sources and enabling detailed bibliometric analyses of the extracted datasets.</p>
        </sec>
    </body>
    <back>
        <fn-group>
            <fn fn-type="other">
                <p>Article based on the thesis of E. M. GAVRON, entitled “<italic>Google Scholar Metrics e OpenAlex: construção de indicadores para medir impacto e visibilidade de periódicos</italic>”. Universidade Federal de Santa Catarina, 2025.</p>
            </fn>
            <fn fn-type="other">
                <label>How to cite this article:</label>
                <p>Gavron, E. M. <italic>et al</italic>. A tool for bibliometric analysis of journals indexed in Google Scholar Metrics and OpenAlex. <italic>Transinformação</italic>, v. 37, e2515501, 2025. <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1590/2318-0889202537e2515501">https://doi.org/10.1590/2318-0889202537e2515501</ext-link>.</p>
            </fn>
        </fn-group>
        <sec sec-type="data-availability" specific-use="data-available-upon-request">
            <title>Data Availability</title>
            <p>The research data are available on request from the corresponding author.</p>
        </sec>
        <ref-list>
            <title>References</title>
            <ref id="B01">
                <mixed-citation>Canto, F. L. <italic>et al.</italic> Latin american and caribbean journals indexed in Google Scholar Metrics. <italic>Scientometrics</italic>, v. 127, n. 2, p. 763-783, 2022. Doi: https://doi.org/10.1007/s11192-021-04237-x.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Canto</surname>
                            <given-names>F. L.</given-names>
                        </name>
                        <etal/>
                    </person-group>
                    <article-title>Latin american and caribbean journals indexed in Google Scholar Metrics</article-title>
                    <source>Scientometrics</source>
                    <volume>127</volume>
                    <issue>2</issue>
                    <fpage>763</fpage>
                    <lpage>783</lpage>
                    <year>2022</year>
                    <pub-id pub-id-type="doi">10.1007/s11192-021-04237-x</pub-id>
                </element-citation>
            </ref>
            <ref id="B02">
                <mixed-citation>Canto, F. L. <italic>et al.</italic> Gsm_hdata: a bibliometric tool to analyze data from google scholar metrics. <italic>Mobile Networks and Applications</italic>, v. 29, p. 754-761, 2024. Doi: https://doi.org/10.1007/s11036-023-02258-9.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Canto</surname>
                            <given-names>F. L.</given-names>
                        </name>
                        <etal/>
                    </person-group>
                    <article-title>Gsm_hdata: a bibliometric tool to analyze data from google scholar metrics</article-title>
                    <source>Mobile Networks and Applications</source>
                    <volume>29</volume>
                    <fpage>754</fpage>
                    <lpage>761</lpage>
                    <year>2024</year>
                    <pub-id pub-id-type="doi">10.1007/s11036-023-02258-9</pub-id>
                </element-citation>
            </ref>
            <ref id="B03">
                <mixed-citation>Canto, F. L. <italic>et al.</italic> Latin American and Caribbean journals indexed in Google Scholar Metrics. [Data set]. <italic>Zenodo</italic>, 2021. Versão 2. Doi: https://doi.org/10.5281/zenodo.5704895.</mixed-citation>
                <element-citation publication-type="webpage">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Canto</surname>
                            <given-names>F. L.</given-names>
                        </name>
                        <etal/>
                    </person-group>
                    <source>Latin American and Caribbean journals indexed in Google Scholar Metrics</source>
                    <comment>Data set</comment>
                    <publisher-name>Zenodo</publisher-name>
                    <year>2021</year>
                    <comment>Versão 2</comment>
                    <pub-id pub-id-type="doi">10.5281/zenodo.5704895</pub-id>
                </element-citation>
            </ref>
            <ref id="B04">
                <mixed-citation>Costa, H.; Canto, F. L.; Pinto, A. L. Google Scholar Metrics e a proposta do novo Qualis: impacto dos periódicos brasileiros de Ciência da Informação. <italic>Informação &amp; Sociedade</italic>, v. 30, n. 1, 2020. Doi: https://doi.org/10.1007/s11036-023-02258-9.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Costa</surname>
                            <given-names>H</given-names>
                        </name>
                        <name>
                            <surname>Canto</surname>
                            <given-names>F. L</given-names>
                        </name>
                        <name>
                            <surname>Pinto</surname>
                            <given-names>A. L</given-names>
                        </name>
                    </person-group>
                    <article-title>Google Scholar Metrics e a proposta do novo Qualis: impacto dos periódicos brasileiros de Ciência da Informação</article-title>
                    <source>Informação &amp; Sociedade</source>
                    <volume>30</volume>
                    <issue>1</issue>
                    <year>2020</year>
                    <pub-id pub-id-type="doi">10.1007/s11036-023-02258-9</pub-id>
                </element-citation>
            </ref>
            <ref id="B05">
                <mixed-citation>Delgado López-Cózar, E.; Cabezas-Clavijo, A. Google Scholar Metrics: an unreliable tool for assessing scientific journals. <italic>El Profesional de la Información</italic>, v. 21, n. 4, p. 419-427, 2012. Available from: https://recyt.fecyt.es/index.php/EPI/article/view/epi.2012.jul.15. Cited: Dec. 10, 2024.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Delgado López-Cózar</surname>
                            <given-names>E.</given-names>
                        </name>
                        <name>
                            <surname>Cabezas-Clavijo</surname>
                            <given-names>A.</given-names>
                        </name>
                    </person-group>
                    <article-title>Google Scholar Metrics: an unreliable tool for assessing scientific journals</article-title>
                    <source>El Profesional de la Información</source>
                    <volume>21</volume>
                    <issue>4</issue>
                    <fpage>419</fpage>
                    <lpage>427</lpage>
                    <year>2012</year>
                    <comment>Available from: <ext-link ext-link-type="uri" xlink:href="https://recyt.fecyt.es/index.php/EPI/article/view/epi.2012.jul.15">https://recyt.fecyt.es/index.php/EPI/article/view/epi.2012.jul.15</ext-link></comment>
                    <date-in-citation content-type="access-date">Dec. 10, 2024</date-in-citation>
                </element-citation>
            </ref>
            <ref id="B06">
                <mixed-citation>Delgado López-Cózar, E.; Cabezas-Clavijo, A. Ranking journals: could Google scholar metrics be an alternative to journal citation reports and Scimago journal rank?. <italic>Learned publishing</italic>, v. 26, n. 2, p. 101-114, 2013. Doi: http://doi.wiley.com/10.1087/20130206.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Delgado López-Cózar</surname>
                            <given-names>E.</given-names>
                        </name>
                        <name>
                            <surname>Cabezas-Clavijo</surname>
                            <given-names>A.</given-names>
                        </name>
                    </person-group>
                    <article-title>Ranking journals: could Google scholar metrics be an alternative to journal citation reports and Scimago journal rank?</article-title>
                    <source>Learned publishing</source>
                    <volume>26</volume>
                    <issue>2</issue>
                    <fpage>101</fpage>
                    <lpage>114</lpage>
                    <year>2013</year>
                    <pub-id pub-id-type="doi">10.1087/20130206</pub-id>
                </element-citation>
            </ref>
            <ref id="B07">
                <mixed-citation>Delgado López-Cózar, E.; Orduña-Malea, E.; Martín-Martín, A. Google Scholar as a Data Source for Research Assessment. <italic>In</italic>: Glänzel, W. <italic>et al.</italic> (org.). <italic>Springer Handbook of Science and Technology Indicators</italic>. Cham: Springer, 2019. p. 95-127. Doi: https://doi.org/10.1007/978-3-030-02511-3_4.</mixed-citation>
                <element-citation publication-type="book">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Delgado López-Cózar</surname>
                            <given-names>E.</given-names>
                        </name>
                        <name>
                            <surname>Orduña-Malea</surname>
                            <given-names>E.</given-names>
                        </name>
                        <name>
                            <surname>Martín-Martín</surname>
                            <given-names>A.</given-names>
                        </name>
                    </person-group>
                    <chapter-title>Google Scholar as a Data Source for Research Assessment</chapter-title>
                    <person-group person-group-type="compiler">
                        <name>
                            <surname>Glänzel</surname>
                            <given-names>W.</given-names>
                        </name>
                        <etal/>
                    </person-group>
                    <source>Springer Handbook of Science and Technology Indicators</source>
                    <publisher-loc>Cham</publisher-loc>
                    <publisher-name>Springer</publisher-name>
                    <year>2019</year>
                    <fpage>95</fpage>
                    <lpage>127</lpage>
                    <pub-id pub-id-type="doi">10.1007/978-3-030-02511-3_4</pub-id>
                </element-citation>
            </ref>
            <ref id="B08">
                <mixed-citation>Google Scholar. <italic>Google Scholar Metrics</italic>. 2024. Available from: https://scholar.google.com/intl/en/scholar/metrics.html#overview. Cited: Feb. 21, 2025.</mixed-citation>
                <element-citation publication-type="webpage">
                    <person-group person-group-type="author">
                        <collab>Google Scholar</collab>
                    </person-group>
                    <source>Google Scholar Metrics</source>
                    <year>2024</year>
                    <comment>Available from: <ext-link ext-link-type="uri" xlink:href="https://scholar.google.com/intl/en/scholar/metrics.html#overview">https://scholar.google.com/intl/en/scholar/metrics.html#overview</ext-link></comment>
                    <date-in-citation content-type="access-date">Feb. 21, 2025</date-in-citation>
                </element-citation>
            </ref>
            <ref id="B09">
                <mixed-citation>Hao, H. <italic>et al.</italic> Thirty-two years of ieee vis: Authors, fields of study and citations. <italic>IEEE Transactions on Visualization and Computer Graphics</italic>, v. 29, n. 1, p. 1016-1025, 2022. Doi: https://doi.org/10.1109/TVCG.2022.3209422.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Hao</surname>
                            <given-names>H</given-names>
                        </name>
                        <etal/>
                    </person-group>
                    <article-title>Thirty-two years of ieee vis: Authors, fields of study and citations</article-title>
                    <source>IEEE Transactions on Visualization and Computer Graphics</source>
                    <volume>29</volume>
                    <issue>1</issue>
                    <fpage>1016</fpage>
                    <lpage>1025</lpage>
                    <year>2022</year>
                    <pub-id pub-id-type="doi">10.1109/TVCG.2022.3209422</pub-id>
                </element-citation>
            </ref>
            <ref id="B10">
                <mixed-citation>Jacsó, P. Google Scholar Metrics for Publications. <italic>Online Information Review</italic>, v. 36, n. 4, p. 604-619, 2012. Available from: https://www.emerald.com/insight/content/doi/10.1108/14684521211254121/full/html. Cited: Feb. 15, 2025.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Jacsó</surname>
                            <given-names>P.</given-names>
                        </name>
                    </person-group>
                    <article-title>Google Scholar Metrics for Publications</article-title>
                    <source>Online Information Review</source>
                    <volume>36</volume>
                    <issue>4</issue>
                    <fpage>604</fpage>
                    <lpage>619</lpage>
                    <year>2012</year>
                    <comment>Available from: <ext-link ext-link-type="uri" xlink:href="https://www.emerald.com/insight/content/doi/10.1108/14684521211254121/full/html">https://www.emerald.com/insight/content/doi/10.1108/14684521211254121/full/html</ext-link></comment>
                    <date-in-citation content-type="access-date">Feb. 15, 2025</date-in-citation>
                </element-citation>
            </ref>
            <ref id="B11">
                <mixed-citation>Leydesdorff, L.; Wouters, P.; Bornmann, L. Professional and citizen bibliometrics: complementarities and ambivalences in the development and use of indicators a state-of-the-art report. <italic>Scientometric s</italic>, v. 109, p. 2129-2150, 2016. Doi: https://doi.org/10.1007/s11192-016-2150-8.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Leydesdorff</surname>
                            <given-names>L</given-names>
                        </name>
                        <name>
                            <surname>Wouters</surname>
                            <given-names>P</given-names>
                        </name>
                        <name>
                            <surname>Bornmann</surname>
                            <given-names>L</given-names>
                        </name>
                    </person-group>
                    <article-title>Professional and citizen bibliometrics: complementarities and ambivalences in the development and use of indicators a state-of-the-art report</article-title>
                    <source>Scientometric s</source>
                    <volume>109</volume>
                    <fpage>2129</fpage>
                    <lpage>2150</lpage>
                    <year>2016</year>
                    <pub-id pub-id-type="doi">10.1007/s11192-016-2150-8</pub-id>
                </element-citation>
            </ref>
            <ref id="B12">
                <mixed-citation>Martín-Martín, A. <italic>et al.</italic> Google Scholar, Microsoft Academic, Scopus, Dimensions, Web of Science e COCI do OpenCitations: uma comparação multidisciplinar de cobertura por meio de citações. <italic>Scientometric s</italic>, v. 126, p. 871-906, 2021. Doi: https://doi.org/10.1007/s11192-020-03690-4.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Martín-Martín</surname>
                            <given-names>A</given-names>
                        </name>
                        <etal/>
                    </person-group>
                    <article-title>Google Scholar, Microsoft Academic, Scopus, Dimensions, Web of Science e COCI do OpenCitations: uma comparação multidisciplinar de cobertura por meio de citações</article-title>
                    <source>Scientometric s</source>
                    <volume>126</volume>
                    <fpage>871</fpage>
                    <lpage>906</lpage>
                    <year>2021</year>
                    <pub-id pub-id-type="doi">10.1007/s11192-020-03690-4</pub-id>
                </element-citation>
            </ref>
            <ref id="B13">
                <mixed-citation>Mongeon, P.; Bowman, T. D.; Costas, R. An open data set of scholars on Twitter. <italic>Quantitative Science Studies</italic>, v. 4, n. 2, p. 314-324, 2023. Doi: https://doi.org/10.1162/qss_a_00250.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Mongeon</surname>
                            <given-names>P</given-names>
                        </name>
                        <name>
                            <surname>Bowman</surname>
                            <given-names>T. D</given-names>
                        </name>
                        <name>
                            <surname>Costas</surname>
                            <given-names>R</given-names>
                        </name>
                    </person-group>
                    <article-title>An open data set of scholars on Twitter</article-title>
                    <source>Quantitative Science Studies</source>
                    <volume>4</volume>
                    <issue>2</issue>
                    <fpage>314</fpage>
                    <lpage>324</lpage>
                    <year>2023</year>
                    <pub-id pub-id-type="doi">10.1162/qss_a_00250</pub-id>
                </element-citation>
            </ref>
            <ref id="B14">
                <mixed-citation>Okamura, K. A half-century of global collaboration in science and the “Shrinking World”. <italic>Quantitative Science Studies</italic>, v. 4, n. 4, p. 938-959, 2023. Doi: https://doi.org/10.1162/qss_a_00268.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Okamura</surname>
                            <given-names>K.</given-names>
                        </name>
                    </person-group>
                    <article-title>A half-century of global collaboration in science and the “Shrinking World”</article-title>
                    <source>Quantitative Science Studies</source>
                    <volume>4</volume>
                    <issue>4</issue>
                    <fpage>938</fpage>
                    <lpage>959</lpage>
                    <year>2023</year>
                    <pub-id pub-id-type="doi">10.1162/qss_a_00268</pub-id>
                </element-citation>
            </ref>
            <ref id="B15">
                <mixed-citation>Orduna-Malea, E.; Aytac, S.; Tran, C. Y. Universities through the eyes of bibliographic databases: a retroactive growth comparison of Google Scholar, Scopus and Web of Science. <italic>Scientometrics</italic>, v. 121, p. 433-450, 2019. Doi: https://doi.org/10.1007/s11192-019-03208-7.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Orduna-Malea</surname>
                            <given-names>E</given-names>
                        </name>
                        <name>
                            <surname>Aytac</surname>
                            <given-names>S</given-names>
                        </name>
                        <name>
                            <surname>Tran</surname>
                            <given-names>C. Y</given-names>
                        </name>
                    </person-group>
                    <article-title>Universities through the eyes of bibliographic databases: a retroactive growth comparison of Google Scholar, Scopus and Web of Science</article-title>
                    <source>Scientometrics</source>
                    <volume>121</volume>
                    <fpage>433</fpage>
                    <lpage>450</lpage>
                    <year>2019</year>
                    <pub-id pub-id-type="doi">10.1007/s11192-019-03208-7</pub-id>
                </element-citation>
            </ref>
            <ref id="B16">
                <mixed-citation>Orduña-Malea, E.; Delgado López-Cózar, E. Google Scholar Metrics evolution: an analysis according to languages. <italic>Scientometrics</italic>, v. 98, p. 2353-2367, 2014. Doi: https://doi.org/10.1007/s11192-013-1164-8.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Orduña-Malea</surname>
                            <given-names>E</given-names>
                        </name>
                        <name>
                            <surname>Delgado López-Cózar</surname>
                            <given-names>E</given-names>
                        </name>
                    </person-group>
                    <article-title>Google Scholar Metrics evolution: an analysis according to languages</article-title>
                    <source>Scientometrics</source>
                    <volume>98</volume>
                    <fpage>2353</fpage>
                    <lpage>2367</lpage>
                    <year>2014</year>
                    <pub-id pub-id-type="doi">10.1007/s11192-013-1164-8</pub-id>
                </element-citation>
            </ref>
            <ref id="B17">
                <mixed-citation>Pinto, A. L. <italic>et al.</italic> Periódicos científicos brasileiros indexados no Google Scholar Metrics. <italic>Informação &amp; Sociedade</italic>: <italic>Estudos</italic>, v. 30, n. 4, p. 1-18, 2020. Available from: https://periodicos.ufpb.br/ojs2/index.php/ies/article/view/57048. Cited: Mar. 3, 2025.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Pinto</surname>
                            <given-names>A. L.</given-names>
                        </name>
                        <etal/>
                    </person-group>
                    <article-title>Periódicos científicos brasileiros indexados no Google Scholar Metrics</article-title>
                    <source><italic>Informação &amp; Sociedade</italic>: <italic>Estudos</italic></source>
                    <volume>30</volume>
                    <issue>4</issue>
                    <fpage>1</fpage>
                    <lpage>18</lpage>
                    <year>2020</year>
                    <comment>Available from: <ext-link ext-link-type="uri" xlink:href="https://periodicos.ufpb.br/ojs2/index.php/ies/article/view/57048">https://periodicos.ufpb.br/ojs2/index.php/ies/article/view/57048</ext-link></comment>
                    <date-in-citation content-type="access-date">Mar. 3, 2025</date-in-citation>
                </element-citation>
            </ref>
            <ref id="B18">
                <mixed-citation>Rodrigues, D.; Lopes, A. L.; Batista, F. Web of Science citation gaps: an automatic approach to detect indexed but missing citations. Symposium on Languages, Applications and Technologies, 12., 2023, Wadern. <italic>Proceedings</italic> [...]. Wadern: Schloss Dagstuhl, Leibniz Center for Informatics, 2023. Doi: https://doi.org/10.4230/OASIcs.SLATE.2023.5.</mixed-citation>
                <element-citation publication-type="confproc">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Rodrigues</surname>
                            <given-names>D</given-names>
                        </name>
                        <name>
                            <surname>Lopes</surname>
                            <given-names>A. L</given-names>
                        </name>
                        <name>
                            <surname>Batista</surname>
                            <given-names>F</given-names>
                        </name>
                    </person-group>
                    <comment>Web of Science citation gaps: an automatic approach to detect indexed but missing citations</comment>
                    <conf-name>Symposium on Languages, Applications and Technologies, 12</conf-name>
                    <conf-date>2023</conf-date>
                    <conf-loc>Wadern</conf-loc>
                    <source>Proceedings</source>
                    <comment>[...]</comment>
                    <publisher-loc>Wadern</publisher-loc>
                    <publisher-name>Schloss Dagstuhl, Leibniz Center for Informatics</publisher-name>
                    <year>2023</year>
                    <pub-id pub-id-type="doi">10.4230/OASIcs.SLATE.2023.5</pub-id>
                </element-citation>
            </ref>
            <ref id="B19">
                <mixed-citation>Scheidsteger, T.; Haunschild, R. Which of the metadata with relevance for bibliometrics are the same and which are different when switching from Microsoft Academic Graph to OpenAlex? <italic>El Profesional de la Información</italic>, v. 32, n. 2, 2023. Doi: https://doi.org/10.3145/epi.2023.mar.09.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Scheidsteger</surname>
                            <given-names>T</given-names>
                        </name>
                        <name>
                            <surname>Haunschild</surname>
                            <given-names>R</given-names>
                        </name>
                    </person-group>
                    <article-title>Which of the metadata with relevance for bibliometrics are the same and which are different when switching from Microsoft Academic Graph to OpenAlex?</article-title>
                    <source>El Profesional de la Información</source>
                    <volume>32</volume>
                    <issue>2</issue>
                    <year>2023</year>
                    <pub-id pub-id-type="doi">10.3145/epi.2023.mar.09</pub-id>
                </element-citation>
            </ref>
            <ref id="B20">
                <mixed-citation>Schnieders, K. <italic>et al.</italic> ORCID coverage in research institutions Readiness for partially automated research reporting. <italic>Frontiers in Research Metrics and Analytics</italic>, v. 7, 2022. Doi: https://doi.org/10.3389/frma.2022.1010504.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Schnieders</surname>
                            <given-names>K.</given-names>
                        </name>
                        <etal/>
                    </person-group>
                    <article-title>ORCID coverage in research institutions Readiness for partially automated research reporting</article-title>
                    <source>Frontiers in Research Metrics and Analytics</source>
                    <volume>7</volume>
                    <year>2022</year>
                    <pub-id pub-id-type="doi">10.3389/frma.2022.1010504</pub-id>
                </element-citation>
            </ref>
            <ref id="B21">
                <mixed-citation>Waltman, L. A review of the literature on citation impact indicators. <italic>Jou rnal of Informetrics</italic>, v. 10, n. 2, p. 365-391, 2016. Doi: https://doi.org/10.1016/j.joi.2016.02.007.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Waltman</surname>
                            <given-names>L</given-names>
                        </name>
                    </person-group>
                    <article-title>A review of the literature on citation impact indicators</article-title>
                    <source>Jou rnal of Informetrics</source>
                    <volume>10</volume>
                    <issue>2</issue>
                    <fpage>365</fpage>
                    <lpage>391</lpage>
                    <year>2016</year>
                    <pub-id pub-id-type="doi">10.1016/j.joi.2016.02.007</pub-id>
                </element-citation>
            </ref>
            <ref id="B22">
                <mixed-citation>Zhang, L. <italic>et al.</italic> Missing institutions in OpenAlex: possible reasons, implications, and solutions. <italic>Scientometrics</italic>, v. 129, p. 5869-5891, 2024. Doi: https://doi.org/10.1007/s11192-023-04923-y.</mixed-citation>
                <element-citation publication-type="journal">
                    <person-group person-group-type="author">
                        <name>
                            <surname>Zhang</surname>
                            <given-names>L</given-names>
                        </name>
                        <etal/>
                    </person-group>
                    <article-title>Missing institutions in OpenAlex: possible reasons, implications, and solutions</article-title>
                    <source>Scientometrics</source>
                    <volume>129</volume>
                    <fpage>5869</fpage>
                    <lpage>5891</lpage>
                    <year>2024</year>
                    <pub-id pub-id-type="doi">10.1007/s11192-023-04923-y</pub-id>
                </element-citation>
            </ref>
        </ref-list>
    </back>
</article>
