<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="letter"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Nursing</journal-id><journal-id journal-id-type="publisher-id">nursing</journal-id><journal-id journal-id-type="index">33</journal-id><journal-title>JMIR Nursing</journal-title><abbrev-journal-title>JMIR Nursing</abbrev-journal-title><issn pub-type="epub">2562-7600</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v8i1e67197</article-id><article-id pub-id-type="doi">10.2196/67197</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Letter</subject></subj-group></article-categories><title-group><article-title>Impact of Attached File Formats on the Performance of ChatGPT-4 on the Japanese National Nursing Examination: Evaluation Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Taira</surname><given-names>Kazuya</given-names></name><degrees>RN, PHN, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Itaya</surname><given-names>Takahiro</given-names></name><degrees>RN, MPH, DrPH</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yada</surname><given-names>Shuntaro</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hiyama</surname><given-names>Kirara</given-names></name><degrees>RN, MPH</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hanada</surname><given-names>Ayame</given-names></name><degrees>RN, PHN, BHS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>Human Health Sciences, Graduate School of Medicine, Kyoto University</institution><addr-line>53, Shogoinkawara-cho, Sakyo-ku</addr-line><addr-line>Kyoto</addr-line><country>Japan</country></aff><aff id="aff2"><institution>Department of Healthcare Epidemiology, Graduate School of Medicine and Public Health, Kyoto University</institution><addr-line>Kyoto</addr-line><country>Japan</country></aff><aff id="aff3"><institution>Graduate School of Science and Technology, Nara Institute of Science and Technology</institution><addr-line>Ikoma</addr-line><country>Japan</country></aff><aff id="aff4"><institution>Faculty of Library, Information and Media Science, University of Tsukuba</institution><addr-line>Tsukuba</addr-line><country>Japan</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Borycki</surname><given-names>Elizabeth</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Sun</surname><given-names>Grace</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Liao</surname><given-names>Pei-Hung</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Arasteh</surname><given-names>Soroosh Tayebi</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Kazuya Taira, RN, PHN, PhD, Human Health Sciences, Graduate School of Medicine, Kyoto University, 53, Shogoinkawara-cho, Sakyo-ku, Kyoto, 606-8507, Japan, 81 0757513927; <email>taira.kazuya.5m@kyoto-u.ac.jp</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>22</day><month>1</month><year>2025</year></pub-date><volume>8</volume><elocation-id>e67197</elocation-id><history><date date-type="received"><day>15</day><month>10</month><year>2024</year></date><date date-type="rev-recd"><day>23</day><month>12</month><year>2024</year></date><date date-type="accepted"><day>26</day><month>12</month><year>2024</year></date></history><copyright-statement>&#x00A9; Kazuya Taira, Takahiro Itaya, Shuntaro Yada, Kirara Hiyama, Ayame Hanada. Originally published in JMIR Nursing (<ext-link ext-link-type="uri" xlink:href="https://nursing.jmir.org">https://nursing.jmir.org</ext-link>), 22.1.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Nursing, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://nursing.jmir.org/">https://nursing.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://nursing.jmir.org/2025/1/e67197"/><abstract><sec><title>Abstract</title><p>This research letter discusses the impact of different file formats on ChatGPT-4&#x2019;s performance on the Japanese National Nursing Examination, highlighting the need for standardized reporting protocols to enhance the integration of artificial intelligence in nursing education and practice.</p></sec></abstract><kwd-group><kwd>nursing examination</kwd><kwd>machine learning</kwd><kwd>ML</kwd><kwd>artificial intelligence</kwd><kwd>AI</kwd><kwd>large language models</kwd><kwd>ChatGPT</kwd><kwd>generative AI</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Numerous generative artificial intelligences (AIs), exemplified by all versions of ChatGPT [<xref ref-type="bibr" rid="ref1">1</xref>] and Llama [<xref ref-type="bibr" rid="ref2">2</xref>], have been developed using large language models and evaluated in health care, particularly in nursing education [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>], successfully passing national nursing examinations in several countries [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Generative AIs are evolving to handle multimodal information, including text and images [<xref ref-type="bibr" rid="ref1">1</xref>]. However, previous evaluations have not assessed the impact of file formats [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>Prompts, particularly long ones, can affect response accuracy owing to potential context loss or exceeded token limits [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. In this study, we hypothesized that the file format attached to prompts could affect the results of nursing research that uses generative AI and aimed to evaluate its impact on ChatGPT-4&#x2019;s performance on the Japanese National Nursing Examination. The findings of this study would be useful for improving the quality of reports on future nursing research that uses generative AI.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Ethics Approval</title><p>This study did not require ethical approval or informed consent, as the data analyzed were obtained from a published database from the Ministry of Health, Labour and Welfare.</p></sec><sec id="s2-2"><title>Generative AI Model</title><p>We used the original, unmodified GPT-4 (gpt-4&#x2010;1106-preview, accessed March 2024) without additional training, tuning, or data. ChatGPT, launched by OpenAI in 2022, with GPT-4 released in March 2023, is currently widely used.</p></sec><sec id="s2-3"><title>Input Data</title><p>The dataset included all 50 basic knowledge questions from the 2023 Japanese National Nursing Examination, along with 190 general questions. The passing standard for these basic knowledge questions was approximately 80%. ChatGPT-3.5 has consistently failed to meet this standard [<xref ref-type="bibr" rid="ref4">4</xref>], leading us to consider whether performance might vary based on file format. Questions were prepared in TEXT (.txt), DOCX (.docx), PDF (.pdf), and IMAGE (.jpg) formats and in a format that directly described all questions in the prompt (PROMPT-ONLY format). Although other formats, including CSV, JSON, XML, and Markdown, could be used to present questions and choices, we excluded them to maintain consistency and focus on more common formats.</p></sec><sec id="s2-4"><title>Prompt Engineering</title><p>The prompts for each file format are summarized in <xref ref-type="other" rid="box1">Textbox 1</xref>.</p><boxed-text id="box1"><title> Prompts provided to ChatGPT-4. The files (mentioned at the end of the prompt for TXT, DOCX, PDF, and JPG formats) were made viewable via OpenAI&#x2019;s application programming interface (API) function: ASSISTANT (type = retrieval).</title><p/><p><bold>&#x003C;Prompt for PROMPT-ONLY format&#x003E;</bold></p><p><italic>You are an expert in the field of nursing. Answer the given questions briefly and numerically. {Question number}. {Question}. Options: (1) {Option 1}, (2) {Option 2}, (3) {Option 3}, (4) {Option 4}</italic></p><p/><disp-quote><p><bold>Example:</bold> 1. Which vessel sends blood from the fetus to the placenta in the fetal circulation? Options: (1) Common carotid artery, (2) Pulmonary artery, (3) Umbilical artery, and (4) Umbilical vein.</p></disp-quote><p/><p><bold>&#x003C;Prompt for TXT, DOCX, PDF, and JPG formats&#x003E;</bold></p><p><italic>You are an expert in the field of nursing. Answer briefly and numerically all questions given by the file.</italic></p></boxed-text></sec><sec id="s2-5"><title>Data Analyses</title><p>Prompts for all formats were processed for 100 iterations each; the median and IQR of the percentage of correct answers were calculated. Differences among the percentages of correct answers by the attached file format were compared using the Kruskal-Wallis test and Dann-Bonferroni test. Statistical analyses were performed using Python (version 3.11.4) with the <italic>pandas</italic> (version 1.5.3) and <italic>matplotlib</italic> (version 3.7.1) libraries.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>The median percentages of correct answers were 92% (IQR 64%&#x2010;94%), 92% (IQR 92%&#x2010;94%), 94% (IQR 94%&#x2010;96%), 87% (IQR 86%&#x2010;90%), and 26% (IQR 20%&#x2010;30%) for PROMPT-ONLY, TEXT, PDF, DOCX, and JPG formats, respectively. The differences between the attached formats were statistically significant in all pairs (<italic>P</italic>&#x003C;.01) except for the PROMPT-ONLY versus TEXT and PROMPT-ONLY versus DOCX pairs (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Performance evaluation of ChatGPT-4 on the Japanese National Nursing Examination by the attached file format. Outliers, shown as dots, are values below Q1 &#x2013; 1.5 or above Q3 + 1.5 in the IQR.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="nursing_v8i1e67197_fig01.png"/></fig></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>ChatGPT-4&#x2019;s performance on the Japanese National Nursing Examination varied significantly across file formats. The best performance was observed with PROMPT-ONLY, TEXT, and PDF formats (median scores &#x003E;92%), followed by DOCX (87%), and the worst performance was with JPG (26%). The PROMPT-ONLY format exhibited a larger IQR and more variability than TEXT, PDF, and DOCX formats. JPG&#x2019;s poor performance highlights a significant limitation of generative AI, which excels at processing digital text but struggles with interpreting text from images. This &#x201C;visual comprehension&#x201D; gap has critical implications for AI applications involving nondigital text sources. The variability in PROMPT-ONLY performance may reflect reduced accuracy with longer prompts [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>].</p><p>Therefore, to prepare for a future where generative AI is integrated into nursing practice and education [<xref ref-type="bibr" rid="ref10">10</xref>], it is crucial to understand the interaction between humans and generative AI, including the impact of input file formats. Additionally, it is essential to report the following aspects in a standardized manner:</p><list list-type="bullet"><list-item><p>Name and version of the generative AI model</p></list-item><list-item><p>Presence of additional training, tuning, or knowledge transfer</p></list-item><list-item><p>Prompt design and attached file formats</p></list-item><list-item><p>Response generation parameters, including the number of iterations, temperature settings, and maximum token count</p></list-item><list-item><p>Execution environment (if applicable)</p></list-item></list><p>However, as we only examined ChatGPT-4&#x2019;s performance on the Japanese National Nursing Examination and the impact of major file formats, investigations on other formats and AI models are warranted. Particularly, evaluating the performance of AI that specializes in image processing and image formats other than JPG and expanding the evaluations to include national nursing examinations in other countries and clinical questions in practice will be important in future research.</p></sec></body><back><ack><p>This study was supported by the Japan Society for the Promotion of Science (JSPS KAKENHI 22K17549). The funder played no role in the study design, data collection, analysis, interpretation, or writing of the report. We would like to thank Editage for the English-language editing. During the preparation of this work, the authors used DeepL and ChatGPT to improve the language and readability. The article was completely structured by author-oriented content; these artificial intelligence (AI) tools were only used to correct English expressions and check for grammar. Therefore, these AIs did not affect the results or interpretations. After using these tools, the authors reviewed and edited the content as necessary and take full responsibility for the content of the published article.</p></ack><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><collab>OpenAI</collab><name name-style="western"><surname>Achiam</surname><given-names>J</given-names> </name><name name-style="western"><surname>Adler</surname><given-names>S</given-names> </name><etal/></person-group><article-title>GPT-4 technical report</article-title><source>arXiv</source><comment>Preprint posted online on  Mar 15, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2303.08774</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Topaz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Peltonen</surname><given-names>LM</given-names> </name><name name-style="western"><surname>Michalowski</surname><given-names>M</given-names> </name><etal/></person-group><article-title>The ChatGPT effect: nursing education and generative artificial intelligence</article-title><source>J Nurs Educ</source><year>2024</year><month>02</month><day>5</day><fpage>1</fpage><lpage>4</lpage><pub-id pub-id-type="doi">10.3928/01484834-20240126-01</pub-id><pub-id pub-id-type="medline">38302101</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Touvron</surname><given-names>H</given-names> </name><name name-style="western"><surname>Lavril</surname><given-names>T</given-names> </name><name name-style="western"><surname>Izacard</surname><given-names>G</given-names> </name><etal/></person-group><article-title>LLaMA: open and efficient foundation language models</article-title><source>arXiv</source><comment>Preprint posted online on  Feb 27, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2302.1397</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jin</surname><given-names>HK</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>HE</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>E</given-names> </name></person-group><article-title>Performance of ChatGPT-3.5 and GPT-4 in national licensing examinations for medicine, pharmacy, dentistry, and nursing: a systematic review and meta-analysis</article-title><source>BMC Med Educ</source><year>2024</year><month>09</month><day>16</day><volume>24</volume><issue>1</issue><fpage>1013</fpage><pub-id pub-id-type="doi">10.1186/s12909-024-05944-8</pub-id><pub-id pub-id-type="medline">39285377</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Taira</surname><given-names>K</given-names> </name><name name-style="western"><surname>Itaya</surname><given-names>T</given-names> </name><name name-style="western"><surname>Hanada</surname><given-names>A</given-names> </name></person-group><article-title>Performance of the large language model ChatGPT on the National Nurse Examinations in Japan: evaluation study</article-title><source>JMIR Nurs</source><year>2023</year><month>06</month><day>27</day><volume>6</volume><fpage>e47305</fpage><pub-id pub-id-type="doi">10.2196/47305</pub-id><pub-id pub-id-type="medline">37368470</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Su</surname><given-names>MC</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>LE</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>LH</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>YC</given-names> </name></person-group><article-title>Assessing question characteristic influences on ChatGPT&#x2019;s performance and response-explanation consistency: insights from Taiwan&#x2019;s Nursing Licensing Exam</article-title><source>Int J Nurs Stud</source><year>2024</year><month>05</month><volume>153</volume><fpage>104717</fpage><pub-id pub-id-type="doi">10.1016/j.ijnurstu.2024.104717</pub-id><pub-id pub-id-type="medline">38401366</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Ratnayake</surname><given-names>H</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>C</given-names> </name></person-group><article-title>A prompting framework to enhance language model output</article-title><conf-name>AI 2023: Advances in Artificial Intelligence: 36th Australasian Joint Conference on Artificial Intelligence</conf-name><conf-date>Nov 28 to Dec 1, 2023</conf-date><conf-loc>Brisbane, Australia</conf-loc><pub-id pub-id-type="doi">10.1007/978-981-99-8391-9_6</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Levy</surname><given-names>M</given-names> </name><name name-style="western"><surname>Jacoby</surname><given-names>A</given-names> </name><name name-style="western"><surname>Goldberg</surname><given-names>Y</given-names> </name></person-group><article-title>Same task, more tokens: the impact of input length on the reasoning performance of large language models</article-title><source>arXiv</source><comment>Preprint posted online on  Feb 19, 2024</comment><pub-id pub-id-type="doi">10.18653/v1/2024.acl-long.818</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>ZY</given-names> </name><name name-style="western"><surname>Verma</surname><given-names>A</given-names> </name><name name-style="western"><surname>Doshi-Velez</surname><given-names>F</given-names> </name><name name-style="western"><surname>Low</surname><given-names>BKH</given-names> </name></person-group><article-title>Understanding the relationship between prompts and response uncertainty in large language models</article-title><source>arXiv</source><comment>Preprint posted online on  Jul 20, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2407.14845</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goldberg</surname><given-names>CB</given-names> </name><name name-style="western"><surname>Adams</surname><given-names>L</given-names> </name><name name-style="western"><surname>Blumenthal</surname><given-names>D</given-names> </name><etal/></person-group><article-title>To do no harm &#x2014; and the most good &#x2014; with AI in health care</article-title><source>NEJM AI</source><year>2024</year><month>02</month><day>22</day><volume>1</volume><issue>3</issue><pub-id pub-id-type="doi">10.1056/AIp2400036</pub-id></nlm-citation></ref></ref-list></back></article>