<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Nursing</journal-id><journal-id journal-id-type="publisher-id">nursing</journal-id><journal-id journal-id-type="index">33</journal-id><journal-title>JMIR Nursing</journal-title><abbrev-journal-title>JMIR Nursing</abbrev-journal-title><issn pub-type="epub">2562-7600</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v9i1e82842</article-id><article-id pub-id-type="doi">10.2196/82842</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Performance of Large Language Models in the Japanese Public Health Nurse National Examination: Comparative Cross-Sectional Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Takahashi</surname><given-names>Yutaro</given-names></name><degrees>MHSc</degrees><xref ref-type="aff" rid="aff1"/><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Kumakura</surname><given-names>Ryota</given-names></name><degrees>MHSc</degrees><xref ref-type="aff" rid="aff1"/><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Okamoto</surname><given-names>Rie</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Omote</surname><given-names>Shizuko</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff id="aff1"><institution>Faculty of Health Sciences, Institute of Medical, Pharmaceutical and Health Sciences, Kanazawa University</institution><addr-line>Kodatsuno 5-11-80</addr-line><addr-line>Kanazawa</addr-line><addr-line>Ishikawa</addr-line><country>Japan</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Borycki</surname><given-names>Elizabeth</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Dovdon</surname><given-names>Baigalmaa</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>DeBlieck</surname><given-names>Connie</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Yutaro Takahashi, MHSc, Faculty of Health Sciences, Institute of Medical, Pharmaceutical and Health Sciences, Kanazawa University, Kodatsuno 5-11-80, Kanazawa, Ishikawa, 920-0942, Japan, 81 76-265-2559; <email>y-takahashi@staff.kanazawa-u.ac.jp</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>20</day><month>2</month><year>2026</year></pub-date><volume>9</volume><elocation-id>e82842</elocation-id><history><date date-type="received"><day>22</day><month>08</month><year>2025</year></date><date date-type="rev-recd"><day>22</day><month>12</month><year>2025</year></date><date date-type="accepted"><day>03</day><month>02</month><year>2026</year></date></history><copyright-statement>&#x00A9; Yutaro Takahashi, Ryota Kumakura, Rie Okamoto, Shizuko Omote. Originally published in JMIR Nursing (<ext-link ext-link-type="uri" xlink:href="https://nursing.jmir.org">https://nursing.jmir.org</ext-link>), 20.2.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Nursing, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://nursing.jmir.org/">https://nursing.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://nursing.jmir.org/2026/1/e82842"/><abstract><sec><title>Background</title><p>Large language models (LLMs) have shown promising results on Japanese national medical and nursing examinations. However, no study has evaluated LLM performance on the Japanese Public Health Nurse National Examination, which requires specialized knowledge in community health and public health nursing practice.</p></sec><sec><title>Objective</title><p>This study aimed to compare the performance of multiple LLMs on the Japanese Public Health Nurse National Examination and evaluate their potential utility in public health nursing education.</p></sec><sec sec-type="methods"><title>Methods</title><p>Three LLMs were evaluated: GPT-4o, Claude Opus 4, and Gemini 2.5 Pro. All 110 questions from the 111th Public Health Nurse National Examination were administered using standardized prompts. Questions were classified by format (text vs figure or calculation), content (general vs situational), and selection type (single vs multiple choice). Accuracy rates and 95% CIs were calculated, with statistical comparisons performed using chi-square tests.</p></sec><sec sec-type="results"><title>Results</title><p>All LLMs exceeded the passing criterion (60%). The accuracy rates were as follows: 85.5% (94/110) for GPT-4o (95% CI 77.5%&#x2010;91.5%), 91.8% (101/110) for Claude Opus 4 (95% CI 85.0%&#x2010;96.2%), and 92.7% (102/110) for Gemini 2.5 Pro (95% CI 86.2%&#x2010;96.8%). No significant differences were found among the LLMs (<italic>P</italic>&#x003E;.99). However, all models showed lower accuracy on multiple-choice questions than on single-choice questions, with significant intramodel differences observed for GPT-4o (10/16, 62.5% vs 82/92, 89.1%; <italic>P</italic>=.01) and Claude Opus 4 (12/16, 75% vs 87/92, 94.6%; <italic>P</italic>=.03).</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>LLMs demonstrated high performance on a public health nursing examination but showed limitations in complex reasoning requiring multiple-choice selection. These findings suggest the potential for LLM use as educational support tools while highlighting the need for cautious implementation in specialized nursing education.</p></sec></abstract><kwd-group><kwd>large language models</kwd><kwd>public health nursing</kwd><kwd>licensure</kwd><kwd>nursing</kwd><kwd>artificial intelligence</kwd><kwd>AI</kwd><kwd>education</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>With advances in artificial intelligence (AI), large language models (LLMs) have gained attention in various fields. High-performance LLMs have been developed, including GPT-3.5, GPT-4, Anthropic&#x2019;s Claude, and Google&#x2019;s Gemini [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>], which have demonstrated their ability to generate contextually appropriate responses to complex questions. In Japan, the capabilities of these models have been evaluated using questions from the national medical and nursing examinations. Liu et al [<xref ref-type="bibr" rid="ref4">4</xref>] compared GPT-4o, GPT-4, Claude Opus 3, and Gemini 1.5 Pro using Japan&#x2019;s national medical examinations and found that GPT-4o achieved the highest accuracy rate (89.2%) and intermodel performance depending on the subject area and question format. Takagi et al [<xref ref-type="bibr" rid="ref5">5</xref>] also found that GPT-4 significantly outperformed GPT-3.5 in national medical examinations (79.9% vs 50.8%), revealing significant performance gaps between model generations. Taira et al [<xref ref-type="bibr" rid="ref6">6</xref>] reported that LLM accuracy declined for questions about pharmacology, social welfare, and related legal regulations on national nursing examinations, suggesting limitations in responding to questions requiring specialized knowledge and institutional understanding. These findings demonstrate LLM utility and associated challenges in Japanese language processing and professional examinations.</p><p>However, no study has compared the performance of multiple LLMs on the Japanese Public Health Nurse National Examination, which assesses the knowledge and skills necessary for public health nursing practice. The Public Health Nurse National Examination comprises content addressing community-based health issues, multidisciplinary collaboration, and public health nursing activities, requiring not only medical knowledge but also an understanding of Japan&#x2019;s community health systems [<xref ref-type="bibr" rid="ref7">7</xref>]. Compared to clinical medicine, public health nursing presents unique challenges for LLMs. While medical examinations primarily assess biomedical knowledge and clinical reasoning, public health nursing requires the integration of social determinants of health, public health policy knowledge, community-level interventions, and understanding of local health systems. These multifaceted aspects demand complex reasoning that simultaneously considers multiple factors, which may pose greater challenges for current LLMs. Therefore, this study aimed to compare and evaluate the performance of GPT-4o (Open AI), Claude Opus 4 (Anthropic), and Gemini 2.5 Pro (Google AI) on questions from the Japanese Public Health Nurse National Examination to clarify the extent to which AI can respond to the specialized knowledge and skills necessary for public health nursing. We believe our work has significance for examining the potential for AI use in future public health nursing education.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>This comparative cross-sectional study evaluated the performance of 3 LLMs on the Japanese Public Health Nurse National Examination. We used a census sampling approach, analyzing all questions from the examination to compare accuracy rates across different LLMs and question types.</p></sec><sec id="s2-2"><title>Tested LLMs</title><p>Three representative LLMs were selected: OpenAI&#x2019;s GPT-4o [<xref ref-type="bibr" rid="ref1">1</xref>], Anthropic&#x2019;s Claude Opus 4 [<xref ref-type="bibr" rid="ref2">2</xref>], and Google&#x2019;s Gemini 2.5 Pro [<xref ref-type="bibr" rid="ref3">3</xref>].</p></sec><sec id="s2-3"><title>Japanese Public Health Nurse National Examination</title><p>The Japanese Public Health Nurse National Examination is conducted based on the Act on Public Health Nurses, Midwives, and Nurses to assess the knowledge and skills necessary for public health nurses [<xref ref-type="bibr" rid="ref8">8</xref>]. It includes 110 questions. The questions are classified as general and situational. General questions are worth 1 point each, whereas situational questions are worth 2 points each. The passing criterion is 60% correct answers for general and situational questions combined, although the percentage may be adjusted when inappropriate questions are excluded. The questions are in multiple-choice format, and the participants must select one or more correct answers from 4 to 5 options. The pass rate is 90% to 95%.</p></sec><sec id="s2-4"><title>Study Population and Question Selection</title><p>This study used a census sampling approach, including all 110 questions from the 111th Public Health Nurse National Examination administered in February 2025 [<xref ref-type="bibr" rid="ref9">9</xref>]. No sampling was conducted as the entire population of examination questions was analyzed. For questions containing figures and tables, figures were processed as image data, whereas the question text was handled as text. No questions were designated as inappropriate by the Ministry of Health, Labour and Welfare; the passing criterion was 60% [<xref ref-type="bibr" rid="ref9">9</xref>]. The examination questions were classified according to format (text questions and figure or calculation questions), content (general and situational questions), and selection type (single- and multiple-choice questions).</p></sec><sec id="s2-5"><title>Prompt Engineering</title><p>Because prompt engineering significantly affects the generated output, the question input formats were standardized. On the basis of previous research [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>], 6 prompts corresponding to different types of questions were created (<xref ref-type="table" rid="table1">Table 1</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Standardized prompts for different question types in the Japanese Public Health Nurse National Examination. Template placeholders (&#x201C;&#x003C;&#x201D; and &#x201C;&#x003E;&#x201D;) indicate where specific content was inserted for each question type.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Prompt type</td><td align="left" valign="bottom">Prompt template</td></tr></thead><tbody><tr><td align="left" valign="top">Prompt 1: general questions</td><td align="left" valign="top">&#x201C;Japanese Public Health Nurse National Examination questions are presented. Please answer the following question in brief by selecting an option. Select one option unless otherwise specified. Question: &#x003C;Question content&#x003E; 1. &#x003C;Option 1&#x003E; 2. &#x003C;Option 2&#x003E; 3. &#x003C;Option 3&#x003E; 4. &#x003C;Option 4&#x003E; 5. &#x003C;Option 5&#x003E; (if applicable)&#x201D;</td></tr><tr><td align="left" valign="top">Prompt 2: situational questions</td><td align="left" valign="top">&#x201C;Japanese Public Health Nurse National Examination questions are presented. Based on the following situational setting, please answer the question in brief by selecting an option. Select one option unless otherwise specified. Situation: &#x003C;Situational content&#x003E; Question: &#x003C;Question content&#x003E; 1. &#x003C;Option 1&#x003E; 2. &#x003C;Option 2&#x003E; 3. &#x003C;Option 3&#x003E; 4. &#x003C;Option 4&#x003E; 5. &#x003C;Option 5&#x003E; (if applicable)&#x201D;</td></tr><tr><td align="left" valign="top">Prompt 3: image questions</td><td align="left" valign="top">&#x201C;Japanese Public Health Nurse National Examination questions are presented. Please review the following image and answer the question in brief by selecting an option. Select one option unless otherwise specified. Question: &#x003C;Question content&#x003E; 1. &#x003C;Option 1&#x003E; 2. &#x003C;Option 2&#x003E; 3. &#x003C;Option 3&#x003E; 4. &#x003C;Option 4&#x003E; 5. &#x003C;Option 5&#x003E; (if applicable)&#x201D;</td></tr><tr><td align="left" valign="top">Prompt 4: situational questions with images</td><td align="left" valign="top">&#x201C;Japanese Public Health Nurse National Examination questions are presented. Based on the following situational setting, please review the image and answer the question in brief by selecting an option. Select one option unless otherwise specified. Situation: &#x003C;Situational content&#x003E; Question: &#x003C;Question content&#x003E; 1. &#x003C;Option 1&#x003E; 2. &#x003C;Option 2&#x003E; 3. &#x003C;Option 3&#x003E; 4. &#x003C;Option 4&#x003E; 5. &#x003C;Option 5&#x003E; (if applicable)&#x201D;</td></tr><tr><td align="left" valign="top">Prompt 5: calculation questions</td><td align="left" valign="top">&#x201C;Japanese Public Health Nurse National Examination questions are presented. Please read the following question, review the mark sheet format in the image, and answer the question in brief by selecting an option. Provide your answer as a numerical value in brief. Question: &#x003C;Question content&#x003E;&#x201D;</td></tr><tr><td align="left" valign="top">Prompt 6: situational calculation questions</td><td align="left" valign="top">&#x201C;Japanese Public Health Nurse National Examination questions are presented. Please read the following situational setting and question, review the mark sheet format in the image, and answer the calculation question. Provide your answer as a numerical value in brief. Situation: &#x003C;Situational content&#x003E; Question: &#x003C;Question content&#x003E;&#x201D;</td></tr></tbody></table></table-wrap></sec><sec id="s2-6"><title>Data Collection Procedures</title><p>Questions were input from June 25 to 26, 2025. The question text and images were directly inserted into each LLM&#x2019;s chat window. Each question was input in a new independent chat window to avoid potential influence from previous responses. All questions were administered in Japanese using standardized prompts.</p><p>The definition of &#x201C;correct&#x201D; answers was based on the official answers published by the Ministry of Health, Labour, and Welfare [<xref ref-type="bibr" rid="ref9">9</xref>]. Only answers that clearly matched the official correct answers and followed the instructions provided in the question text were considered &#x201C;correct.&#x201D; Ambiguous answers, evident mistakes, unclear responses, and responses with an excessive number of answer choices were considered incorrect. All responses from the LLMs were independently reviewed and scored by 2 authors (YT and RK), with any discrepancies resolved through discussion.</p></sec><sec id="s2-7"><title>Data Analysis</title><p>For each LLM, the number of correct answers, accuracy rates, 95% CIs, total scores, and score rates were calculated. Accuracy rates were compared across question format (text vs figure or calculation), question content (general vs situational), and selection type (single vs multiple choice) both between LLMs (inter-LLM comparison) and within each LLM (intra-LLM comparison). Numerical input calculation questions were excluded from the selection type analysis.</p><p>Statistical comparisons were performed using chi-square tests for expected cell frequencies of &#x2265;5 and the Fisher exact test when expected cell frequencies were &#x003C;5. For multiple pairwise inter-LLM comparisons, the Bonferroni correction was applied to control for type I error. Statistical significance was set at <italic>P</italic>&#x2264;.05 (2 tailed). All statistical analyses were conducted using Stata (version 18.0; StataCorp LLC).</p></sec><sec id="s2-8"><title>Ethical Considerations</title><p>Ethics approval was not required because only data from a published database were analyzed.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>Detailed results are presented in <xref ref-type="table" rid="table2">Table 2</xref>. The accuracy rates for each LLM for all 110 questions were as follows: 85.5% (n=94) of the questions for GPT-4o (95% CI 77.5%&#x2010;91.5%), 91.8% (n=101) of the questions for Claude Opus 4 (95% CI 85.0%&#x2010;96.2%), and 92.7% (n=102) of the questions for Gemini 2.5 Pro (95% CI 86.2%&#x2010;96.8%). The corresponding scores were 86.2% (125/145), 91.7% (133/145), and 93.1% (135/145), respectively, all of which exceeded the passing criterion (60%). In terms of question characteristics, the accuracy rates for general questions (n=75) were as follows: 84% (n=63) for GPT-4o, 92% (n=69) for Claude Opus 4, and 92% (n=69) for Gemini 2.5 Pro. The corresponding rates for situational questions (n=35) were 88.6% (n=31), 91.4% (n=32), and 94.3% (n=33), respectively.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Performance of large language models (LLMs) on the 111th Public Health Nurse National Examination.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Category</td><td align="left" valign="bottom" colspan="2">GPT-4o</td><td align="left" valign="bottom" colspan="2">Claude Opus 4</td><td align="left" valign="bottom" colspan="2">Gemini 2.5 Pro</td><td align="left" valign="bottom" colspan="3">LLM comparison <italic>P</italic> value</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Correct answers, n (%)</td><td align="left" valign="bottom"><italic>P</italic> value</td><td align="left" valign="bottom">Correct answers, n (%)</td><td align="left" valign="bottom"><italic>P</italic> value</td><td align="left" valign="bottom">Correct answers, n (%)</td><td align="left" valign="bottom"><italic>P</italic> value</td><td align="left" valign="bottom">GPT-4o vs Claude Opus 4</td><td align="left" valign="bottom">GPT-4o vs Gemini</td><td align="left" valign="bottom">Claude Opus 4 vs Gemini</td></tr></thead><tbody><tr><td align="left" valign="top">Overall accuracy rate (n=110)</td><td align="left" valign="top">94 (85.5)</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="top">101 (91.8)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">102 (92.7)</td><td align="left" valign="top">&#x2014;</td><td align="char" char="." valign="top">&#x003E;.99</td><td align="char" char="." valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td></tr><tr><td align="left" valign="top">Overall score (n=145)</td><td align="left" valign="top">125 (86.2)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">133 (91.7)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">135 (93.1)</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td></tr><tr><td align="left" valign="top" colspan="2">By question content</td><td align="char" char="." valign="top">.53</td><td align="left" valign="top"/><td align="char" char="." valign="top">&#x003E;.99</td><td align="left" valign="top"/><td align="char" char="." valign="top">&#x003E;.99</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>General questions (n=75)</td><td align="left" valign="top">63 (84.0)</td><td align="left" valign="top"/><td align="left" valign="top">69 (92.0)</td><td align="left" valign="top"/><td align="left" valign="top">69 (92.0)</td><td align="left" valign="top"/><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Situational questions (n=35)</td><td align="left" valign="top">31 (88.6)</td><td align="left" valign="top"/><td align="left" valign="top">32 (91.4)</td><td align="left" valign="top"/><td align="left" valign="top">33 (94.3)</td><td align="left" valign="top"/><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td></tr><tr><td align="left" valign="top" colspan="2">By question format</td><td align="char" char="." valign="top">.69</td><td align="left" valign="top"/><td align="char" char="." valign="top">&#x003E;.99</td><td align="left" valign="top"/><td align="char" char="." valign="top">.60</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Text questions (n=98)</td><td align="left" valign="top">84 (85.7)</td><td align="left" valign="top"/><td align="left" valign="top">90 (91.8)</td><td align="left" valign="top"/><td align="left" valign="top">90 (91.8)</td><td align="left" valign="top"/><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Figure or calculation questions (n=12)</td><td align="left" valign="top">10 (83.3)</td><td align="left" valign="top"/><td align="left" valign="top">11 (91.7)</td><td align="left" valign="top"/><td align="left" valign="top">12 (100.0)</td><td align="left" valign="top"/><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td></tr><tr><td align="left" valign="top" colspan="2">By selection type</td><td align="char" char="." valign="top">.04</td><td align="left" valign="top"/><td align="char" char="." valign="top">.03</td><td align="left" valign="top"/><td align="char" char="." valign="top">.09</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Single-choice questions (n=92)</td><td align="left" valign="top">82 (89.1)</td><td align="left" valign="top"/><td align="left" valign="top">87 (94.6)</td><td align="left" valign="top"/><td align="left" valign="top">87 (94.6)</td><td align="left" valign="top"/><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Multiple-choice questions (n=16)</td><td align="left" valign="top">10 (62.5)</td><td align="left" valign="top"/><td align="left" valign="top">12 (75.0)</td><td align="left" valign="top"/><td align="left" valign="top">13 (81.3)</td><td align="left" valign="top"/><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap><p>In terms of question format, the accuracy rates for the text questions (n=98) were as follows: 85.7% (n=84) for GPT-4o, 91.8% (n=90) for Claude Opus 4, and 91.8% (n=90) for Gemini 2.5 Pro. The corresponding rates for figure or calculation questions (n=12) were 83.3% (n=10), 91.7% (n=11), and 100% (n=12), respectively. The accuracy rates for single-choice questions (n=92) were as follows: 89.1% (n=82) for GPT-4o, 94.6% (n=87) for Claude Opus 4, and 94.6% (n=87) for Gemini 2.5 Pro. However, for the multiple-choice questions (n=16), all LLMs showed decreased accuracy, and the corresponding rates were 62.5% (n=10), 75% (n=12), and 81.3% (n=13), respectively.</p><p>Statistical comparisons among the LLMs showed no significant differences. Intra-LLM comparisons revealed significant differences between single- and multiple-choice questions for GPT-4o (<italic>P</italic>=.01) and Claude Opus 4 (<italic>P</italic>=.03), with the accuracy rates for multiple-choice questions being significantly lower.</p></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>This study compared and evaluated the performances of multiple LLMs on the Japanese Public Health Nurse National Examination. All the LLMs significantly exceeded the passing criterion of 60%. These results indicate that LLMs have acquired considerable specialized knowledge, with the performance of GPT-4o (94/110, 85.5%) being comparable to that of LLMs in previous medical examinations [<xref ref-type="bibr" rid="ref4">4</xref>] and superior to that of older-generation models [<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>While all LLMs showed high overall accuracy rates, a clear performance decline was observed for multiple-choice questions. This contrasts with the high accuracy rates for single-choice questions, with significant differences found for GPT-4o and Gemini 2.5 Pro. This phenomenon demonstrates the current limitations of LLMs in complex reasoning, which requires the simultaneous evaluation of multiple concepts. As public health nursing practice requires comprehensive judgment considering multiple factors such as regional characteristics, residents&#x2019; needs, social resources, and multidisciplinary collaboration, the performance decline on multiple-choice questions indicates that LLMs have limitations in the complex decision-making faced in actual public health nursing practice.</p><p>The accuracy rates of the LLMs evaluated in this study (94/110, 85.5% to 102/110, 92.7%) and their scores (125/145, 86.2% to 135/145, 93.1%) substantially exceeded the passing standard (60%). The pass rate for the 111th Public Health Nurse National Examination was 94% [<xref ref-type="bibr" rid="ref9">9</xref>]; however, this represents the proportion of examinees who exceeded the passing standard, and the overall mean score for all examinees is not publicly available. Therefore, a direct comparison between the overall academic performance of examinees and LLM performance is challenging.</p><p>In the field of medical education, multiple studies comparing LLM and student performance have been reported. A study comparing final-year emergency medicine students with AI models [<xref ref-type="bibr" rid="ref10">10</xref>] demonstrated that students achieved a 79.4% accuracy rate, outperforming ChatGPT (72.5%) and Gemini (54.4%). The superiority of students was particularly pronounced in image-based questions, highlighting current limitations in AI models&#x2019; visual information processing capabilities. Additionally, a study using 1070 medical imaging questions [<xref ref-type="bibr" rid="ref11">11</xref>] found that GPT-4 correctly answered 67.8% of the questions it attempted, whereas the students&#x2019; passing mean was 63%. However, the student majority vote achieved a 94.5% accuracy rate, substantially surpassing the AI. This demonstrates that even when individual students&#x2019; abilities may be equal to or slightly inferior to those of AI, collective student judgment significantly exceeds AI performance.</p><p>In our study, while LLM performance on figure or calculation questions was high (10/12, 83.3% to 12/12, 100%), the small number of questions (n=12) necessitates larger-scale validation. More importantly, the learning processes of LLMs and humans are fundamentally different. LLMs learn patterns from large volumes of text data, whereas public health nurses acquire decision-making capabilities by integrating practical experience with theoretical knowledge. Furthermore, human public health nurses possess essential practical competencies that are not measurable through written examinations, including ethical judgment, empathy, and interpersonal communication skills. As these previous studies [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>] demonstrate, while AI shows potential as a supplementary educational tool, it cannot replace human capabilities, particularly in areas requiring visual interpretation, clinical reasoning, and collective judgment. Therefore, LLMs should be appropriately positioned as educational and learning support tools rather than as replacements for human public health nurses.</p><p>The results of this study have important implications from the perspective of competency development in public health nursing education. The &#x201C;Practical Competencies Required of Public Health Nurses and Achievement Goals and Levels at Graduation&#x201D; document by the Japanese Ministry of Health, Labour and Welfare [<xref ref-type="bibr" rid="ref12">12</xref>] classifies public health nurse competencies into five domains: (1) ability to clarify community health issues and develop plans; (2) ability to provide continuous support and collaborative organizational activities for individuals, families, groups, and organizations to enhance community health promotion capacity and evaluate these activities; (3) community health crisis management capacity; (4) ability to develop projects, policies, social resources, and systems to enhance community health levels; and (5) professional autonomy and continuous quality improvement capacity. LLMs may be particularly effective in providing learning support during information gathering and assessment stages within domain 1. This competency includes information collection for clarifying community health issues, community diagnosis, and prioritization of health issues, where LLMs are expected to play a supplementary role in confirming foundational knowledge and organizing information.</p><p>For domain 1, the achievement level at graduation is set at either level 1 (&#x201C;able to implement independently with minimal guidance&#x201D;) or level 2 (&#x201C;able to implement under supervision [from supervising public health nurses or faculty]&#x201D;) [<xref ref-type="bibr" rid="ref12">12</xref>]. As revealed in this study, LLMs demonstrated a performance decline in multiple-choice questions and have limitations in complex judgment tasks. Therefore, when using LLMs as educational tools, it is crucial to cultivate students&#x2019; ability to critically evaluate LLM outputs and maintain practical judgment skills based on community characteristics to reach these achievement levels.</p><p>Several ethical considerations must be addressed when using LLMs in public health nursing education. The Japanese Ministry of Education, Culture, Sports, Science and Technology guidelines [<xref ref-type="bibr" rid="ref13">13</xref>] indicate that directly using generative AI outputs does not deepen students&#x2019; own learning, that differences in generative AI types (paid vs free versions) may create disparities in student outcomes leading to unfairness, and that confidential and personal information may be unintentionally leaked or disclosed. A survey of Japanese medical students [<xref ref-type="bibr" rid="ref14">14</xref>] found that while 41.9% had experience using ChatGPT, only 10.2% had used it for medical assignments and 47% held negative views about its use for medical reports. Many students felt that, considering the time required to verify AI responses, independent learning would be more efficient, highlighting the essential need to cultivate critical evaluation skills for LLM outputs. A narrative review on chatbot integration in nursing education [<xref ref-type="bibr" rid="ref15">15</xref>] also emphasizes the importance of ethical considerations, indicating the urgent need to establish ethical frameworks for AI use across nursing education.</p><p>The results of this study revealed that while LLMs demonstrated high accuracy rates on the Public Health Nurse National Examination, performance declined on multiple-choice questions. This finding has important implications for using LLMs as learning support tools in public health nursing education. Given the demonstrated limitations of LLMs in complex judgment requiring simultaneous consideration of multiple factors, LLMs are suitable for supplementary roles such as confirming foundational knowledge and gathering information, whereas faculty instruction remains crucial for learning scenarios requiring complex judgment. The Ministry of Education, Culture, Sports, Science and Technology has issued guidelines [<xref ref-type="bibr" rid="ref13">13</xref>] on the educational use of generative AI at universities and colleges of technology, and public health nurse training institutions are also called upon to develop guidelines that clearly specify appropriate use scenarios and limitations for LLMs. A nationwide survey on information and communications technology (ICT) use among public health nurses in local governments [<xref ref-type="bibr" rid="ref16">16</xref>] found that 82.8% responded that they did not know the procedures for promoting ICT use, indicating challenges in adapting to digital technology even among practicing public health nurses. This suggests the importance of providing systematic digital literacy education from the public health nurse training stage.</p><p>A detailed examination is needed regarding curriculum integration and faculty training. For example, development of specific implementation strategies is required, including in which courses and how LLMs should be introduced, how to design a phased introduction process, and how faculty should learn appropriate LLM use methods. In particular, establishing organizational training systems for enhancing faculty AI literacy and developing assessment methods premised on LLM use are important future challenges. To accumulate knowledge regarding these strategies, pilot program implementation and evaluation will be necessary.</p><p>As practical implications, there is potential for the use of LLMs as continuing education and self-directed learning support tools for practicing public health nurses. A concept analysis of LLMs in nursing education [<xref ref-type="bibr" rid="ref17">17</xref>] positions LLMs as transformative tools that provide accessible and personalized learning support and promote cognitive and skill development. In public health nursing education as well, use is anticipated for responding to new public health issues and during information gathering stages in community diagnosis. Additionally, there is potential for the use of LLMs as an auxiliary tool in situations requiring rapid information organization, such as during disasters or emerging infectious disease outbreaks. However, a survey on ICT use among public health nurses in local governments [<xref ref-type="bibr" rid="ref16">16</xref>] found that 89.1% of municipalities expressed concerns about individuals who have difficulty adapting to digital technology, necessitating careful introduction that considers the essence of interpersonal support in public health nursing work. Furthermore, while 55.9% in the same survey actively promoted ICT use, only 26.7% perceived progress as smooth, indicating challenges in digital literacy education for practicing public health nurses and establishing organizational support systems. As noted in the aforementioned concept analysis [<xref ref-type="bibr" rid="ref17">17</xref>], careful attention must be paid to LLM limitations and ethical implications, ensuring that LLM integration aligns with the values and goals of nursing education. Therefore, when using LLMs in practical settings, it is essential to critically evaluate LLM outputs and integrate them with community characteristics and practical knowledge, considering the limitations in complex judgment revealed in this study.</p><p>The findings of this study occupy an important position within the broader context of AI use in health profession education. A narrative review on chatbots in nursing education [<xref ref-type="bibr" rid="ref15">15</xref>] demonstrated a surge in related research from 2021 to 2023 (with 2023 accounting for 70% of publications), indicating growing scholarly interest in this field. Together with LLM evaluation studies on medical licensing examinations [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>], it is becoming increasingly clear that LLMs demonstrate high performance across medical licensing examinations generally. However, the performance decline on multiple-choice questions demonstrated in this study indicates the existence of LLM limitations in complex judgment unique to public health nursing, such as integration of social determinants of health and planning of community-level interventions. This is consistent with findings from previous studies in medical education [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>] showing that collective student judgment far exceeds that of AI, supporting the appropriate positioning of LLMs as educational and learning support tools rather than replacements for human health professionals. Additionally, a survey of medical students [<xref ref-type="bibr" rid="ref14">14</xref>] showed that 47% held negative views about LLM use for medical assignments, indicating the recognition of the need for verification. A concept analysis in nursing education [<xref ref-type="bibr" rid="ref17">17</xref>] also pointed out the need for careful consideration of LLM limitations and ethical implications. Furthermore, the aforementioned review on chatbot integration in nursing education [<xref ref-type="bibr" rid="ref15">15</xref>] emphasizes the importance of ethical considerations and the urgency of original research while acknowledging it as a promising field. Collectively, these findings suggest that across health profession education generally, while LLMs have potential as useful auxiliary tools, cultivating the ability to understand their limitations and use them critically is a common challenge across all types of health profession education.</p></sec><sec id="s4-2"><title>Limitations and Future Directions</title><p>This study has the following strengths. First, this is the first study to evaluate the performance of multiple LLMs on the Public Health Nurse National Examination. While LLM evaluations have been conducted on medical and nursing licensing examinations, this study represents the first systematic evaluation in the public health nursing field. Second, reproducibility was ensured through the use of standardized prompts. Six prompts were created according to question types, achieving consistent evaluation. Third, detailed analysis by question format revealed the important finding of performance decline on multiple-choice questions. This discovery demonstrates LLM limitations in complex reasoning requiring the simultaneous consideration of multiple factors, with important implications for future educational implementation.</p><p>This study has several limitations. First, as a cross-sectional evaluation of a single year, temporal changes in LLM performance and reproducibility across different examination years could not be evaluated in this study. Second, accuracy rates alone do not clarify the quality of reasoning processes or correlation with actual public health nursing practice competencies. Third, while LLM versions and settings may influence results, this study was limited to evaluation using specific versions (GPT-4o, Claude Opus 4, and Gemini 2.5 Pro). Fourth, results may vary depending on prompt expression methods, and there is no guarantee that the standardized prompts used in this study are optimal. Fifth, because the overall mean score for examinees is not publicly available, direct performance comparison between LLMs and human public health nurse examinees is difficult. Furthermore, this study verified LLM performance in the educational evaluation context of a national examination and did not evaluate their utility or safety as decision support tools in actual public health nursing practice. Future research should include continued evaluation over multiple years, qualitative analysis of reasoning processes, validation of utility in practical settings, comparative studies with human public health nurses, and examination of applicability to decision support in actual practice.</p></sec><sec id="s4-3"><title>Conclusions</title><p>The LLMs evaluated demonstrated high performance on the Public Health Nurse National Examination; however, they also had limitations in solving problems requiring complex judgment. These findings provide important foundational data showing the possibilities and challenges of AI use in public health nursing. On the basis of these results, LLMs should be cautiously used as supplementary tools in public health nursing education.</p></sec></sec></body><back><notes><sec><title>Funding</title><p>This work was supported by a Japan Society for the Promotion of Science Grant-in-Aid for Scientific Research (grant 25K14186).</p></sec><sec><title>Data Availability</title><p>The data supporting the findings of this study are available from the corresponding author (YT) upon request.</p></sec></notes><fn-group><fn fn-type="con"><p>YT contributed to conceptualization, methodology, investigation, funding acquisition, supervision, and manuscript review and editing. RK contributed to methodology, investigation, data curation, formal analysis, and writing of the original draft. RO and SO contributed to manuscript review and editing.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">ICT</term><def><p>information and communications technology</p></def></def-item><def-item><term id="abb3">LLM</term><def><p>large language model</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>Hello GPT-4o</article-title><source>OpenAI</source><year>2024</year><access-date>2025-08-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://openai.com/index/hello-gpt-4o/">https://openai.com/index/hello-gpt-4o/</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="web"><article-title>Introducing Claude 4</article-title><source>Anthropic</source><year>2025</year><access-date>2025-08-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.anthropic.com/news/claude-4">https://www.anthropic.com/news/claude-4</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="web"><article-title>Gemini 2.5 Pro</article-title><source>Google Cloud</source><year>2025</year><access-date>2025-08-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro">https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-pro</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Okuhara</surname><given-names>T</given-names> </name><name name-style="western"><surname>Dai</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Evaluating the effectiveness of advanced large language models in medical knowledge: a comparative study using Japanese national medical examination</article-title><source>Int J Med Inform</source><year>2025</year><month>01</month><volume>193</volume><fpage>105673</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2024.105673</pub-id><pub-id pub-id-type="medline">39471700</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Takagi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Watari</surname><given-names>T</given-names> </name><name name-style="western"><surname>Erabi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sakaguchi</surname><given-names>K</given-names> </name></person-group><article-title>Performance of GPT-3.5 and GPT-4 on the Japanese Medical Licensing Examination: comparison study</article-title><source>JMIR Med Educ</source><year>2023</year><month>06</month><day>29</day><volume>9</volume><fpage>e48002</fpage><pub-id pub-id-type="doi">10.2196/48002</pub-id><pub-id pub-id-type="medline">37384388</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Taira</surname><given-names>K</given-names> </name><name name-style="western"><surname>Itaya</surname><given-names>T</given-names> </name><name name-style="western"><surname>Hanada</surname><given-names>A</given-names> </name></person-group><article-title>Performance of the large language model ChatGPT on the National Nurse Examinations in Japan: evaluation study</article-title><source>JMIR Nurs</source><year>2023</year><month>06</month><day>27</day><volume>6</volume><fpage>e47305</fpage><pub-id pub-id-type="doi">10.2196/47305</pub-id><pub-id pub-id-type="medline">37368470</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="web"><article-title>About the "National Examination Question Standards for Public Health Nurses, Midwives, and Nurses, 2023 Edition"</article-title><source>Ministry of Health, Labor and Welfare Japan</source><year>2022</year><access-date>2025-08-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.mhlw.go.jp/stf/shingi2/0000159020_00001.html">https://www.mhlw.go.jp/stf/shingi2/0000159020_00001.html</ext-link></comment></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="web"><article-title>Act on Public Health Nurses, Midwives, and Nurses</article-title><source>Japanese Law Translation</source><year>2009</year><access-date>2025-08-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.japaneselawtranslation.go.jp/ja/laws/view/3993">https://www.japaneselawtranslation.go.jp/ja/laws/view/3993</ext-link></comment></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="web"><article-title>Announcement of results for the 111th National Public Health Nurse Examination, the 108th National Midwife Examination, and the 114th National Nurse Examination</article-title><source>Ministry of Health, Labour and Welfare Japan</source><year>2025</year><access-date>2025-08-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.mhlw.go.jp/general/sikaku/successlist/2025/siken03_04_05/about.html">https://www.mhlw.go.jp/general/sikaku/successlist/2025/siken03_04_05/about.html</ext-link></comment></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Al-Thani</surname><given-names>SN</given-names> </name><name name-style="western"><surname>Anjum</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bhutta</surname><given-names>ZA</given-names> </name><etal/></person-group><article-title>Comparative performance of ChatGPT, Gemini, and final-year emergency medicine clerkship students in answering multiple-choice questions: implications for the use of AI in medical education</article-title><source>Int J Emerg Med</source><year>2025</year><month>08</month><day>7</day><volume>18</volume><issue>1</issue><fpage>146</fpage><pub-id pub-id-type="doi">10.1186/s12245-025-00949-6</pub-id><pub-id pub-id-type="medline">40775272</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Roos</surname><given-names>J</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>R</given-names> </name><name name-style="western"><surname>Kaczmarczyk</surname><given-names>R</given-names> </name></person-group><article-title>Evaluating Bard Gemini Pro and GPT-4 Vision against student performance in medical visual question answering: comparative case study</article-title><source>JMIR Form Res</source><year>2024</year><month>12</month><day>17</day><volume>8</volume><fpage>e57592</fpage><pub-id pub-id-type="doi">10.2196/57592</pub-id><pub-id pub-id-type="medline">39714199</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="web"><article-title>Guidelines for the management of nursing schools revised</article-title><source>Ministry of Health, Labour and Welfare Japan</source><year>2023</year><access-date>2025-08-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.mhlw.go.jp/kango_kyouiku/news/4.html">https://www.mhlw.go.jp/kango_kyouiku/news/4.html</ext-link></comment></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="web"><article-title>Regarding the educational aspects of generative AI at universities and technical colleges</article-title><source>Ministry of Education, Culture, Sports, Science and Technology Japan</source><year>2023</year><access-date>2025-12-18</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.mext.go.jp/b_menu/houdou/2023/mext_01260.html">https://www.mext.go.jp/b_menu/houdou/2023/mext_01260.html</ext-link></comment></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Amano</surname><given-names>I</given-names> </name><name name-style="western"><surname>Obi-Nagata</surname><given-names>K</given-names> </name><name name-style="western"><surname>Ninomiya</surname><given-names>A</given-names> </name><name name-style="western"><surname>Fujiwara</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Koibuchi</surname><given-names>N</given-names> </name></person-group><article-title>Use and evaluation of generative artificial intelligence by medical students in Japan</article-title><source>JMA J</source><year>2025</year><month>07</month><day>2</day><volume>8</volume><issue>3</issue><fpage>730</fpage><lpage>735</lpage><pub-id pub-id-type="doi">10.31662/jmaj.2024-0375</pub-id><pub-id pub-id-type="medline">40786463</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>F</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>W</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>S</given-names> </name></person-group><article-title>Evolution of chatbots in nursing education: narrative review</article-title><source>JMIR Med Educ</source><year>2024</year><month>06</month><day>13</day><volume>10</volume><fpage>e54987</fpage><pub-id pub-id-type="doi">10.2196/54987</pub-id><pub-id pub-id-type="medline">38889074</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Akatsuka</surname><given-names>E</given-names> </name><name name-style="western"><surname>Taguchi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Yoshida</surname><given-names>C</given-names> </name><name name-style="western"><surname>Miyagawa</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sugiyama</surname><given-names>D</given-names> </name></person-group><article-title>Current status and challenges in the utilization of information and communication technology and digitalization in public health nursing activities: a national survey of supervisory public health nurses in local governments</article-title><source>Nihon Koshu Eisei Zasshi</source><year>2025</year><month>10</month><day>9</day><volume>72</volume><issue>9</issue><fpage>606</fpage><lpage>615</lpage><pub-id pub-id-type="doi">10.11236/jph.24-095</pub-id><pub-id pub-id-type="medline">40484688</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Harrington</surname><given-names>J</given-names> </name><name name-style="western"><surname>Booth</surname><given-names>RG</given-names> </name><name name-style="western"><surname>Jackson</surname><given-names>KT</given-names> </name></person-group><article-title>Large language models in nursing education: concept analysis</article-title><source>JMIR Nurs</source><year>2025</year><month>08</month><day>22</day><volume>8</volume><fpage>e77948</fpage><pub-id pub-id-type="doi">10.2196/77948</pub-id><pub-id pub-id-type="medline">40845300</pub-id></nlm-citation></ref></ref-list></back></article>