<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD with OASIS Tables with MathML3 v1.3 20210610//EN" "JATS-journalpublishing-oasis-article1-3-mathml3.dtd">
<article article-type="research-article" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:oasis="http://www.niso.org/standards/z39-96/ns/oasis-exchange/table"><front><journal-meta><journal-id journal-id-type="publisher-id">PRD</journal-id><journal-id journal-id-type="coden">PRVDAQ</journal-id><journal-title-group><journal-title>Physical Review D</journal-title><abbrev-journal-title>Phys. Rev. D</abbrev-journal-title></journal-title-group><issn pub-type="ppub">2470-0010</issn><issn pub-type="epub">2470-0029</issn><publisher><publisher-name>American Physical Society</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="doi">10.1103/PhysRevD.110.045020</article-id><article-categories><subj-group subj-group-type="toc-major"><subject>ARTICLES</subject></subj-group><subj-group subj-group-type="toc-minor"><subject>Formal aspects of field theory, field theory in curved space</subject></subj-group></article-categories><title-group><article-title>Learning <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>S</mml:mi></mml:mrow></mml:math></inline-formula>-matrix phases with neural operators</article-title><alt-title alt-title-type="running-title">LEARNING <inline-formula><mml:math display="inline"><mml:mi>S</mml:mi></mml:math></inline-formula>-MATRIX PHASES WITH NEURAL OPERATORS</alt-title><alt-title alt-title-type="running-author">VASILIS NIARCHOS AND CONSTANTINOS PAPAGEORGAKIS</alt-title></title-group><contrib-group><contrib contrib-type="author"><contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-3826-4314</contrib-id><name><surname>Niarchos</surname><given-names>Vasilis</given-names></name><xref ref-type="aff" rid="a1"><sup>1</sup></xref><xref ref-type="author-notes" rid="n1"><sup>,*</sup></xref></contrib><contrib contrib-type="author"><contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0001-6760-5942</contrib-id><name><surname>Papageorgakis</surname><given-names>Constantinos</given-names></name><xref ref-type="aff" rid="a2"><sup>2</sup></xref><xref ref-type="author-notes" rid="n2"><sup>,†</sup></xref></contrib><aff id="a1"><label><sup>1</sup></label>ITCP and CCTP, Department of Physics, <institution-wrap><institution>University of Crete</institution><institution-id institution-id-type="ror">https://ror.org/00dr28g20</institution-id></institution-wrap>, 71003 Heraklion, Greece</aff><aff id="a2"><label><sup>2</sup></label>Centre for Theoretical Physics, Department of Physics and Astronomy, <institution-wrap><institution>Queen Mary University of London</institution><institution-id institution-id-type="ror">https://ror.org/026zzn846</institution-id></institution-wrap>, London E1 4NS, United Kingdom</aff></contrib-group><author-notes><fn id="n1"><label><sup>*</sup></label><p>Contact author: <email>niarchos@physics.uoc.gr</email></p></fn><fn id="n2"><label><sup>†</sup></label><p>Contact author: <email>c.papageorgakis@qmul.ac.uk</email></p></fn></author-notes><pub-date iso-8601-date="2024-08-23" date-type="pub" publication-format="electronic"><day>23</day><month>August</month><year>2024</year></pub-date><pub-date iso-8601-date="2024-08-15" date-type="pub" publication-format="print"><day>15</day><month>August</month><year>2024</year></pub-date><volume>110</volume><issue>4</issue><elocation-id>045020</elocation-id><pub-history><event><date iso-8601-date="2024-05-21" date-type="received"><day>21</day><month>May</month><year>2024</year></date></event><event><date iso-8601-date="2024-08-01" date-type="accepted"><day>1</day><month>August</month><year>2024</year></date></event></pub-history><permissions><copyright-statement>Published by the American Physical Society</copyright-statement><copyright-year>2024</copyright-year><copyright-holder>authors</copyright-holder><license license-type="creative-commons" xlink:href="https://creativecommons.org/licenses/by/4.0/"><license-p content-type="usage-statement">Published by the American Physical Society under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International</ext-link> license. Further distribution of this work must maintain attribution to the author(s) and the published article’s title, journal citation, and DOI. Funded by SCOAP<sup>3</sup>.</license-p></license></permissions><related-article ext-link-type="doi" xlink:href="10.48550/arXiv.2404.14551" related-article-type="preprint"/><abstract><p>We use Fourier neural operators (FNOs) to study the relation between the modulus and phase of amplitudes in <inline-formula><mml:math display="inline"><mml:mn>2</mml:mn><mml:mo stretchy="false">→</mml:mo><mml:mn>2</mml:mn></mml:math></inline-formula> elastic scattering at fixed energies. Unlike previous approaches, we do not employ the integral relation imposed by unitarity, but instead train FNOs to discover it from many samples of amplitudes with finite partial wave expansions. When trained only on true samples, the FNO correctly predicts (unique or ambiguous) phases of amplitudes with infinite partial wave expansions. When also trained on false samples, it can rate the quality of its prediction by producing a true/false classifying index. We observe that the value of this index is strongly correlated with the violation of the unitarity constraint for the predicted phase and present examples where it delineates the boundary between allowed and disallowed profiles of the modulus. Our application of FNOs is unconventional: it involves a simultaneous regression-classification task and emphasizes the role of statistics in ensembles of neural operators. We comment on the merits and limitations of the approach and its potential as a new methodology in theoretical physics.</p></abstract><funding-group><award-group award-type="project"><funding-source country="GR"><institution-wrap><institution>Hellenic Foundation for Research and Innovation</institution><institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open-funder-registry">10.13039/501100013209</institution-id></institution-wrap></funding-source><award-id>15384</award-id></award-group><award-group award-type="unspecified"><funding-source country="EU"><institution-wrap><institution>European Commission</institution><institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open-funder-registry">10.13039/501100000780</institution-id></institution-wrap></funding-source></award-group><award-group award-type="unspecified"><funding-source country=""><institution-wrap><institution>NextGenerationEU</institution></institution-wrap></funding-source></award-group><award-group award-type="grant"><funding-source country="GB"><institution-wrap><institution>Science and Technology Facilities Council</institution><institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open-funder-registry">10.13039/501100000271</institution-id></institution-wrap></funding-source><award-id>ST/T000686/1</award-id><award-id>ST/X00063X/1</award-id></award-group><award-group award-type="grant"><funding-source country="GB"><institution-wrap><institution>Engineering and Physical Sciences Research Council</institution><institution-id institution-id-type="doi" vocab="open-funder-registry" vocab-identifier="10.13039/open-funder-registry">10.13039/501100000266</institution-id></institution-wrap></funding-source><award-id>EP/T022108/1</award-id></award-group><award-group award-type="unspecified"><funding-source country=""><institution-wrap><institution>HPC Midlands+ Consortium</institution></institution-wrap></funding-source></award-group></funding-group><counts><page-count count="19"/></counts></article-meta></front><body><sec id="s1"><label>I.</label><title>INTRODUCTION</title><p>The vast majority of problems in physics and mathematics involve the study of different types of functional relations. On general terms, these relations can be viewed as maps between infinite-dimensional spaces of functions. Sometimes, the origin of these maps is well understood. For example, a function may be obtained as the solution to an integral or differential equation that involves other input functions (e.g. functions that specify the form of the equation, boundary conditions, etc.). Analytic solutions are usually tractable only in special cases, while generic situations are computationally hard and require approximate schemes and numerical methods.</p><p>There are also many contexts where the rules dictating the map of interest are either poorly understood or beyond the reach of the existing framework. This is common for interacting, nonperturbative quantum field theories (QFTs). For example, in QFTs with a standard Lagrangian formulation, one would like to understand the map between spacetime-dependent deformations of the action by arbitrary operators, expressed by source functions in spacetime, and the partition function of the theory (or its functional derivative with respect to the sources). The partition function contains all the necessary information about the local correlation functions of the QFT, which are some of the main objects of interest in quantum physics. The traditional computation of the partition function goes through a path integral, which is typically difficult to evaluate and in many cases also difficult to properly define.<fn id="fn1"><label><sup>1</sup></label><p>Note that in the (super)gravity limit of the AdS/CFT correspondence <xref ref-type="bibr" rid="c1">[1]</xref> the map between sources and functional derivatives of the partition function reduces to the solution of partial differential equations in classical gravity with suitable boundary conditions. This translates the QFT problem back to the study of functional relations in the context of differential equations mentioned above.</p></fn></p><p>In recent years, investigations originating from string theory have also revealed many new examples of QFTs that do not seem to admit a Lagrangian formulation and therefore challenge the traditional Lagrangian and Hamiltonian framework of quantum theories. There is very little we can currently compute in such theories with existing methods. This fact has motivated a flurry of activity in the search for new nonperturbative approaches to QFTs. The modern conformal bootstrap and <inline-formula><mml:math display="inline"><mml:mi>S</mml:mi></mml:math></inline-formula>-matrix bootstrap programs <xref ref-type="bibr" rid="c2 c3 c4">[2–4]</xref> are prominent examples.</p><p>For the above reasons, it is particularly interesting to develop novel methodologies that will allow us to better understand general maps between functions in various contexts. We are especially interested in situations where partial information from explicit solutions in special tractable cases can be used to uncover hidden structures and achieve generalizations toward computationally hard generic regimes. Can data-driven methods help in this direction? Can they produce reliable results with quantifiable error and potentially new analytic understanding?</p><p>In this paper, we would like to probe these general questions in a very specific problem that concerns the relation between the modulus and the phase of scattering amplitudes in elastic <inline-formula><mml:math display="inline"><mml:mn>2</mml:mn><mml:mo stretchy="false">→</mml:mo><mml:mn>2</mml:mn></mml:math></inline-formula> scattering at fixed energies. This relation, which is an important ingredient of <inline-formula><mml:math display="inline"><mml:mi>S</mml:mi></mml:math></inline-formula>-matrix theory, is constrained by unitarity through a nontrivial integral equation [see Eq. <xref ref-type="disp-formula" rid="d5">(5)</xref> below]. Instead of solving this equation directly, we will attempt to rediscover it by “learning” it from the data of amplitudes with finite partial wave expansions, where both the modulus and phase are straightforward to compute as functions of real phase shifts.</p><p>We will study the relationship between modulus and phase (and the implications of unitarity) using a modern supervised machine learning technique: neural operators (NOs) <xref ref-type="bibr" rid="c5 c6">[5,6]</xref>. Unlike standard neural networks that are good function approximators, neural operators are good approximators of maps between infinite-dimensional function spaces. Since we are seeking to learn the map between the modulus and phase of a scattering amplitude—both functions of the scattering angle—NOs present themselves as an appealing tool.</p><p>Our main goal in this context will be to explore: <list list-type="roman-lower"><list-item><label>(i)</label><p>to what extent NOs generalize knowledge from finite to infinite partial wave expansions, and</p></list-item><list-item><label>(ii)</label><p>how to quantify the reliability of the result assuming no prior knowledge of the unitarity constraint.</p></list-item></list></p><p>Toward that end, we will run a simultaneous “regression-classification” task by training the NOs on both true and false samples. Their output will contain an extra label, which will be called “fidelity index,” indicating whether the prediction should be kept as a reliable solution or get rejected. We will provide evidence that the fidelity index extracts nontrivial features of the true solutions and that its value correlates with the degree of violation of the unitarity equation.</p><p>Typically, NOs supplement other direct methods in the solution of complicated equations, commonly partial differential equations (PDEs). The above implementation of NOs in a simultaneous regression-classification task is unconventional; to the best of our knowledge similar applications have been thus far limited (for some recent studies of NOs in image classification see <xref ref-type="bibr" rid="c7 c8 c9 c10">[7–10]</xref>).</p><p>The performance of a NO—and how it learns—for fixed hyperparameters and training datasets depends on various stochastic factors that play a role during the training process and are hard to quantify. We will therefore also propose that it is useful to study the <italic>collective</italic> behavior of NOs. In particular, we will present specific data exhibiting the improved properties of the “mean fidelity index.” We will argue that quantities like the mean fidelity index can be useful and could play a role similar to the Martin parameter <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi></mml:math></inline-formula> [see Eq. <xref ref-type="disp-formula" rid="d9">(9)</xref>], which provides a partial characterization of the scattering amplitudes.</p><p>Setting NOs aside for a moment, another popular machine learning method that appears in the context of PDEs is physics informed neural networks (PINNs) <xref ref-type="bibr" rid="c11">[11]</xref>. In that case, neural networks are used to directly model the unknown function: the equation to be solved goes into the definition of the loss that the network tries to minimize during training. Recently, PINNs were used to directly solve for the unitarity equation, obtaining notable results <xref ref-type="bibr" rid="c12">[12]</xref>. We emphasize that our approach should be viewed as complementary with an orthogonal scope, because we are attempting to reconstruct the unitarity equation and its implications without using it directly.</p><p>The rest of this paper is organized as follows. We begin in Sec. <xref ref-type="sec" rid="s2">II</xref> with an introduction of the physics problem and a summary of the key formulas and definitions used in the main text. In Sec. <xref ref-type="sec" rid="s3">III</xref> we present the salient features of PINNs and NOs, along with useful references for the nonexpert reader. The main results of the paper appear in Sec. <xref ref-type="sec" rid="s4">IV</xref>, which focuses on amplitudes with unique phases, and Sec. <xref ref-type="sec" rid="s5">V</xref>, which discusses the subtle case of amplitudes with phase ambiguities. In both cases, we see that NOs can generalize nontrivially beyond their training set, learning important properties about the structure of the system. We elaborate on the efficiency, advantages, disadvantages and difficulties of the approach. We conclude in Sec. <xref ref-type="sec" rid="s6">VI</xref> with a brief summary of our main results and a discussion of interesting future prospects.</p></sec><sec id="s2"><label>II.</label><title>BACKGROUND: MODULUS AND PHASE IN ELASTIC SCATTERING</title><p>The following discussion is restricted to elastic <inline-formula><mml:math display="inline"><mml:mn>2</mml:mn><mml:mo stretchy="false">→</mml:mo><mml:mn>2</mml:mn></mml:math></inline-formula> scattering. In quantum scattering processes, we measure the differential cross section <inline-formula><mml:math display="inline"><mml:mfrac><mml:mrow><mml:mi>d</mml:mi><mml:mi>σ</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi mathvariant="normal">Ω</mml:mi></mml:mrow></mml:mfrac></mml:math></inline-formula>, which is equal to the square of the modulus of the scattering amplitude <inline-formula><mml:math display="inline"><mml:mi>f</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>θ</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>, <disp-formula id="d1"><mml:math display="block"><mml:mfrac><mml:mrow><mml:mi>d</mml:mi><mml:mi>σ</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi mathvariant="normal">Ω</mml:mi></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mo stretchy="false">|</mml:mo><mml:mi>f</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>θ</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:msup><mml:mo stretchy="false">|</mml:mo><mml:mn>2</mml:mn></mml:msup><mml:mo>.</mml:mo></mml:math><label>(1)</label></disp-formula>The scattering amplitude, which is part of the asymptotic form of the wave function in nonrelativistic quantum mechanics, is a complex number <disp-formula id="d2"><mml:math display="block"><mml:mi>f</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>b</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:msup><mml:mi>e</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo>,</mml:mo></mml:math><label>(2)</label></disp-formula>with modulus <inline-formula><mml:math display="inline"><mml:mi>b</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> and phase <inline-formula><mml:math display="inline"><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>. We used <inline-formula><mml:math display="inline"><mml:mi>z</mml:mi><mml:mo>≔</mml:mo><mml:mi>cos</mml:mi><mml:mi>θ</mml:mi></mml:math></inline-formula> to express the dependence on the scattering angle <inline-formula><mml:math display="inline"><mml:mi>θ</mml:mi></mml:math></inline-formula>. From the differential scattering cross section one reads off <inline-formula><mml:math display="inline"><mml:mi>b</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>, but it is in principle difficult to extract the corresponding phase <inline-formula><mml:math display="inline"><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>.</p><p>Mathematically, this task is easy when the scattering amplitude admits a <italic>finite</italic> partial wave expansion <disp-formula id="d3"><mml:math display="block"><mml:mrow><mml:mi>f</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:mfrac><mml:munderover><mml:mrow><mml:mo>∑</mml:mo></mml:mrow><mml:mrow><mml:mo>ℓ</mml:mo><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:munderover><mml:mo stretchy="false">(</mml:mo><mml:mn>2</mml:mn><mml:mo>ℓ</mml:mo><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mi>sin</mml:mi><mml:msub><mml:mrow><mml:mi>δ</mml:mi></mml:mrow><mml:mrow><mml:mo>ℓ</mml:mo></mml:mrow></mml:msub><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:msub><mml:mrow><mml:mi>δ</mml:mi></mml:mrow><mml:mrow><mml:mo>ℓ</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:msup><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mo>ℓ</mml:mo></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math><label>(3)</label></disp-formula>in terms of <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> phase shifts <inline-formula><mml:math display="inline"><mml:msub><mml:mi>δ</mml:mi><mml:mo>ℓ</mml:mo></mml:msub></mml:math></inline-formula>. Both <inline-formula><mml:math display="inline"><mml:mi>b</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> and <inline-formula><mml:math display="inline"><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> are expressed in terms of the phase shifts. In this form, unitarity plays a simple role; it dictates that the phase shifts are real. In <xref ref-type="disp-formula" rid="d3">(3)</xref>, <inline-formula><mml:math display="inline"><mml:mi>k</mml:mi></mml:math></inline-formula> is the wave number of a nonrelativistic particle scattered by some potential in quantum mechanics and the <inline-formula><mml:math display="inline"><mml:msub><mml:mi>P</mml:mi><mml:mo>ℓ</mml:mo></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> are Legendre polynomials.</p><p>A generic amplitude, however, admits an <italic>infinite</italic> partial wave expansion. At fixed energy (equivalently, fixed <inline-formula><mml:math display="inline"><mml:mi>k</mml:mi></mml:math></inline-formula>) the rescaled amplitude <inline-formula><mml:math display="inline"><mml:mi>F</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>k</mml:mi><mml:mi>f</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> is an infinite superposition of partial waves <disp-formula id="d4"><mml:math display="block"><mml:mrow><mml:mi>F</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:munderover><mml:mrow><mml:mo>∑</mml:mo></mml:mrow><mml:mrow><mml:mo>ℓ</mml:mo><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>∞</mml:mi></mml:mrow></mml:munderover><mml:mo stretchy="false">(</mml:mo><mml:mn>2</mml:mn><mml:mo>ℓ</mml:mo><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mi>sin</mml:mi><mml:msub><mml:mrow><mml:mi>δ</mml:mi></mml:mrow><mml:mrow><mml:mo>ℓ</mml:mo></mml:mrow></mml:msub><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:msub><mml:mrow><mml:mi>δ</mml:mi></mml:mrow><mml:mrow><mml:mo>ℓ</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:msup><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mo>ℓ</mml:mo></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>.</mml:mo></mml:mrow></mml:math><label>(4)</label></disp-formula>In that case, finding the phase <inline-formula><mml:math display="inline"><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> for a given <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> is more complicated and the partial wave expansion is less useful.</p><p>Nevertheless, when formulated more generally, unitarity is a strong condition that nontrivially relates the modulus and the phase of a scattering amplitude. A standard argument<fn id="fn2"><label><sup>2</sup></label><p>See <xref ref-type="bibr" rid="c13 c14">[13,14]</xref> for a review of the argument in nonrelativistic quantum mechanics and <xref ref-type="bibr" rid="c15 c16">[15,16]</xref> for a discussion in relativistic QFT. A related discussion also appears in <xref ref-type="bibr" rid="c17 c18">[17,18]</xref>.</p></fn> shows that unitarity imposes the integral constraint <disp-formula id="d5"><mml:math display="block"><mml:mrow><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:msubsup><mml:mrow><mml:mo>∫</mml:mo></mml:mrow><mml:mrow><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msubsup><mml:mrow><mml:mo>∫</mml:mo></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mi>π</mml:mi></mml:mrow></mml:msubsup><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>ϕ</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mfrac><mml:mrow><mml:mi>B</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>B</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>4</mml:mn><mml:mi>π</mml:mi><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mfrac><mml:mi>cos</mml:mi><mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mi>ϕ</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>-</mml:mo><mml:mi>ϕ</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo stretchy="false">]</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mspace linebreak="goodbreak"/></mml:mrow></mml:math><label>(5)</label></disp-formula>where <disp-formula id="d6"><mml:math display="block"><mml:msub><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>ϕ</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mo>≡</mml:mo><mml:mi>z</mml:mi><mml:msub><mml:mi>z</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>+</mml:mo><mml:msqrt><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:msup><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:msup></mml:mrow></mml:msqrt><mml:msqrt><mml:mrow><mml:mn>1</mml:mn><mml:mo>-</mml:mo><mml:msubsup><mml:mi>z</mml:mi><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:msqrt><mml:mi>cos</mml:mi><mml:msub><mml:mi>ϕ</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>.</mml:mo></mml:math><label>(6)</label></disp-formula>For a given <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>, one would like to solve this equation to determine the corresponding phase <inline-formula><mml:math display="inline"><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>.</p><p>In the existing literature, a significant amount of effort has been put into determining for which <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> there exist solutions for <inline-formula><mml:math display="inline"><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>, either unique or multiple, and several associated bounds have been established. The so-called “dual bound” is derived by setting <inline-formula><mml:math display="inline"><mml:mi>z</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> in <xref ref-type="disp-formula" rid="d5">(5)</xref>. This special case provides a necessary condition for <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> to be the valid modulus of a scattering amplitude, <disp-formula id="d7"><mml:math display="block"><mml:mrow><mml:msubsup><mml:mrow><mml:mo>∫</mml:mo></mml:mrow><mml:mrow><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mfrac><mml:mrow><mml:mi>B</mml:mi><mml:msup><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mfrac><mml:mo>≤</mml:mo><mml:mn>1</mml:mn><mml:mo>.</mml:mo></mml:mrow></mml:math><label>(7)</label></disp-formula>Additional bounds on existence and uniqueness can be obtained by defining the function <disp-formula id="d8"><mml:math display="block"><mml:mrow><mml:mi>K</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>≔</mml:mo><mml:msubsup><mml:mrow><mml:mo>∫</mml:mo></mml:mrow><mml:mrow><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msubsup><mml:mrow><mml:mo>∫</mml:mo></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mi>π</mml:mi></mml:mrow></mml:msubsup><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>ϕ</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mfrac><mml:mrow><mml:mi>B</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mi>B</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mn>4</mml:mn><mml:mi>π</mml:mi><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mfrac></mml:mrow></mml:math><label>(8)</label></disp-formula>and the “Martin parameter” <disp-formula id="d9"><mml:math display="block"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>≔</mml:mo><mml:munder><mml:mi>max</mml:mi><mml:mrow><mml:mo>-</mml:mo><mml:mn>1</mml:mn><mml:mo>≤</mml:mo><mml:mi>z</mml:mi><mml:mo>≤</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munder><mml:mi>K</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>.</mml:mo></mml:math><label>(9)</label></disp-formula>For example, one can trivially show using <xref ref-type="disp-formula" rid="d5">(5)</xref> that <disp-formula id="d10"><mml:math display="block"><mml:mo stretchy="false">|</mml:mo><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">|</mml:mo><mml:mo>≤</mml:mo><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>.</mml:mo></mml:math><label>(10)</label></disp-formula>Moreover, it can be proven that, given a modulus <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>, solutions for phases always exist when <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>≤</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> <xref ref-type="bibr" rid="c19">[19]</xref> but known arguments do not preclude the existence of solutions also for <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>&gt;</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>. Polynomial (finite partial wave) amplitudes are unique if <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>≤</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> <xref ref-type="bibr" rid="c19">[19]</xref>. For amplitudes with an infinite number of partial waves the best bound on uniqueness is currently <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>&lt;</mml:mo><mml:mn>0.86</mml:mn></mml:math></inline-formula> <xref ref-type="bibr" rid="c20">[20]</xref>, but it is believed that phases should be unique up to <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>&lt;</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>, <xref ref-type="bibr" rid="c18 c19">[18,19]</xref>.</p><p>There can also be multiple (ambiguous) phases corresponding to the same modulus, which do not include the trivial ambiguity where all the <inline-formula><mml:math display="inline"><mml:msub><mml:mi>δ</mml:mi><mml:mo>ℓ</mml:mo></mml:msub><mml:mo stretchy="false">→</mml:mo><mml:mo>-</mml:mo><mml:msub><mml:mi>δ</mml:mi><mml:mo>ℓ</mml:mo></mml:msub></mml:math></inline-formula> [and, therefore, <inline-formula><mml:math display="inline"><mml:mi>F</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">→</mml:mo><mml:mo>-</mml:mo><mml:mi>F</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mo>*</mml:mo></mml:msup></mml:math></inline-formula> via <xref ref-type="disp-formula" rid="d4">(4)</xref>]. For elastic scattering this degeneracy is twofold <xref ref-type="bibr" rid="c21 c22">[21,22]</xref> and has been completely classified for finite partial waves with <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:math></inline-formula>, 3 <xref ref-type="bibr" rid="c23 c24 c25">[23–25]</xref>. Phase ambiguities in <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>4</mml:mn></mml:math></inline-formula> amplitudes have been discussed in <xref ref-type="bibr" rid="c26">[26]</xref>. Twofold ambiguous solutions can also be constructed for amplitudes with infinite partial wave expansions. It is interesting to ask what is the lowest possible value of <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi></mml:math></inline-formula> for the ambiguous solutions. For example, for <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:math></inline-formula> the lowest value of <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi></mml:math></inline-formula> is 2.6. An amplitude with the lowest known value of <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>≃</mml:mo><mml:mn>1.67</mml:mn></mml:math></inline-formula> was constructed recently using machine learning methods in <xref ref-type="bibr" rid="c12">[12]</xref>.</p></sec><sec id="s3"><label>III.</label><title>PINNS, NEURAL OPERATORS AND PHYSICS INFORMED NEURAL OPERATOR</title><p>We next summarize some of the high-level features of PINNs and neural operators for the nonexpert reader and highlight their main differences.</p><p>Let us assume that we want to solve a system of equations for a set of unknown functions. In many applications, this is a system of partial differential equations, a system of integro-differential equations, or a set of algebraic equations. A natural machine learning approach is to use neural networks (NNs) as universal function approximators <xref ref-type="bibr" rid="c27">[27]</xref> to model the unknown functions and set up a training process where the parameters of the NNs are optimized to satisfy the prescribed system of equations with the least possible error.<fn id="fn3"><label><sup>3</sup></label><p>This process involves the solution of a typically very high-dimensional nonlinear, nonconvex optimization problem with thousands, millions, or more, parameters. Stochastic gradient descent methods have proved very efficient in this context and algorithms like the adaptive momentum estimation (ADAM) <xref ref-type="bibr" rid="c28">[28]</xref> are popular choices.</p></fn> The domain of the functions is discretized on a collocation grid, and the corresponding error in the equations is evaluated and quantified in a scalar semipositive quantity, typically the mean squared error on the grid. This idea forms the basis behind PINNs <xref ref-type="bibr" rid="c11">[11]</xref> (related ideas go back to several papers from the 1990s, e.g. <xref ref-type="bibr" rid="c29 c30">[29,30]</xref>) and constitutes an “unsupervised” approach: the algorithm generates its own data and tries to solve a problem associated with the specific system of equations. When the form of the equations changes (e.g. the source function in a PDE or the functions that describe the boundary/initial conditions), the PINN needs to be optimized from scratch.</p><p>Neural operators are another data-driven approach that employs NNs. In this case, the idea is to approximate the “solution operator” that maps the input functions (e.g. source functions, boundary/initial conditions) to the output functions solving the system of equations. To achieve this goal a NN with a more complicated architecture is employed. The latter is not merely the composition of linear operations and pointwise nonlinear actions of activation functions, but also convolutions that act nondiagonally on the domain of the input functions. Early discussions of neural operators (and related universal approximation theorems) also go back to the 1990s, e.g. <xref ref-type="bibr" rid="c31">[31]</xref>. In the present work, we will be employing a modern incarnation of the neural operator concept, the so-called Fourier neural operators (FNOs), which are constructed using convolution kernels defined in Fourier space <xref ref-type="bibr" rid="c5 c6">[5,6]</xref>. Another approach that shares some common features with neural operators are the deep operator networks (DeepONets), <xref ref-type="bibr" rid="c32">[32]</xref>. We will not consider DeepONets in this paper.</p><p>The NO is a “supervised” machine learning method. The training is based on a dataset of ground-truth input-output pairs that teach the algorithm to map between the input and output function spaces. In typical applications, this dataset is generated by solving the system of equations of interest through some other method. It is worth noting that, although functions are defined on a grid during this process, NOs are discretization invariant and exhibit advanced performance in zero-shot superresolution—namely, they can be trained at low-resolution samples and compute at never-before-seen high resolutions <xref ref-type="bibr" rid="c5 c6">[5,6]</xref>. Another obvious characteristic advantage of NOs is that, once trained, they can quickly find the solution for new inputs without further retraining, in contradistinction with PINNs. This is convenient if one scans over a landscape of input functions (as we will be doing later in this paper).</p><p>There is a plethora of applications of NOs to PDEs in the literature. A recent application of NOs to the time-dependent Schrödinger equation and scattering in nonrelativistic quantum mechanics appeared in <xref ref-type="bibr" rid="c33">[33]</xref>.</p><p>Recently, the authors of Ref. <xref ref-type="bibr" rid="c12">[12]</xref> employed the PINN approach to study the relation between the modulus and phase of the scattering amplitude in elastic <inline-formula><mml:math display="inline"><mml:mn>2</mml:mn><mml:mo stretchy="false">→</mml:mo><mml:mn>2</mml:mn></mml:math></inline-formula> scattering, solving the unitarity equation <xref ref-type="disp-formula" rid="d5">(5)</xref>. They produced remarkable results, including a new solution with ambiguous phases that has the lowest known Martin parameter <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>≃</mml:mo><mml:mn>1.67</mml:mn></mml:math></inline-formula>. This result improved the relevant bound for the first time in 50 years.</p><p>In this paper we do not want to simply repeat the analysis of <xref ref-type="bibr" rid="c12">[12]</xref> using NOs as an alternative machine learning method. For the reasons outlined in the Introduction, our main motivation is to explore to what extent we can learn the solutions together with the equation we are trying to solve. In the present work, that means learning the modulus/phase relation in Sec. <xref ref-type="sec" rid="s2">II</xref> <italic>without</italic> using the unitarity equation <xref ref-type="disp-formula" rid="d5">(5)</xref>. In this quest, we will be using the NOs in a rather unorthodox way. The NO will be trained on both true and false samples in a class of input functions where <xref ref-type="disp-formula" rid="d5">(5)</xref> is trivially satisfied and will be asked to uncover nontrivial structure underlying <xref ref-type="disp-formula" rid="d5">(5)</xref> outside this class rating its own performance and the quality of its predictions. We hope that this application will inspire other similar explorations in even more complicated problems, where the underlying equations are missing.</p><p>As a final comment, we would like to point out that it is also possible to combine the benefits of PINNs and NOs in a hybrid construction that trains NOs using the loss of the underlying equation like a PINN. This approach is called a physics informed neural operator (PINO) and has been explored in the context of PDEs in <xref ref-type="bibr" rid="c34">[34]</xref>. It would be interesting to explore potential improvements of the results in this work and <xref ref-type="bibr" rid="c12">[12]</xref> using PINOs.<fn id="fn4"><label><sup>4</sup></label><p>It would also be interesting to explore further related applications in the context of the <inline-formula><mml:math display="inline"><mml:mi>S</mml:mi></mml:math></inline-formula>-matrix bootstrap, see, e.g. <xref ref-type="bibr" rid="c35">[35]</xref>.</p></fn></p></sec><sec id="s4"><label>IV.</label><title>UNIQUE PHASES</title><p>In this section, we train a NO on a set of random finite partial wave expansions with <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:math></inline-formula> to learn the mapping between the input modulus <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> and the output <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>, the sine of the corresponding amplitude phase. We assume that the relation is one-to-one and set up the training accordingly. Once trained, we test how well the NO predicts the phase of unseen amplitudes, e.g. amplitudes with an infinite partial wave expansion. We also explore ways to detect whether or not the prediction is reliable.</p><sec id="s4a"><label>A.</label><title>Neural operator setup I: Training on samples of valid solutions</title><p>We now present our first attempt at NO training. We begin by listing the hyperparameters used and detail the choice of training and test datasets, before testing for generalizations of the trained model. All the computations in this work were performed on NVIDIA A100 GPUs with 40 GB RAM.</p><sec id="s4a1"><label>1.</label><title>Hyperparameters and training</title><p><italic>Hyperparameters.</italic> Using the Fourier neural operator implementation of <xref ref-type="bibr" rid="c6">[6]</xref>, for which a well-explained documentation can be found on GitHub, we set up a 1D tensorized Fourier neural operator (TFNO) implemented in <sc>p</sc>y<sc>t</sc>orch with the following hyperparameters: <disp-formula id="und1"><mml:math display="block"><mml:mrow><mml:mtext>number of</mml:mtext><mml:mtext> </mml:mtext><mml:mtext>Fourier modes</mml:mtext><mml:mtext> </mml:mtext><mml:mo id="und1a1">:</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="1em"/><mml:mi mathvariant="monospace">n</mml:mi><mml:mtext>_</mml:mtext><mml:mi mathvariant="monospace">modes</mml:mi><mml:mo>=</mml:mo><mml:mn>50</mml:mn><mml:mo>,</mml:mo><mml:mspace linebreak="newline"/><mml:mtext>number of hidden channels</mml:mtext><mml:mtext> </mml:mtext><mml:mo indentalign="id" indenttarget="und1a1">:</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="1em"/><mml:mi mathvariant="monospace">hidden</mml:mi><mml:mtext>_</mml:mtext><mml:mi mathvariant="monospace">channels</mml:mi><mml:mo>=</mml:mo><mml:mn>64</mml:mn><mml:mo>,</mml:mo><mml:mspace linebreak="newline"/><mml:mtext>number of projection channels</mml:mtext><mml:mtext> </mml:mtext><mml:mo indentalign="id" indenttarget="und1a1">:</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="1em"/><mml:mi mathvariant="monospace">projection</mml:mi><mml:mtext>_</mml:mtext><mml:mi mathvariant="monospace">channels</mml:mi><mml:mo>=</mml:mo><mml:mn>512</mml:mn><mml:mo>,</mml:mo><mml:mspace linebreak="newline"/><mml:mtext>number of layers</mml:mtext><mml:mtext> </mml:mtext><mml:mo indentalign="id" indenttarget="und1a1">:</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="1em"/><mml:mi mathvariant="monospace">n</mml:mi><mml:mtext>_</mml:mtext><mml:mi mathvariant="monospace">layers</mml:mi><mml:mo>=</mml:mo><mml:mn>4</mml:mn><mml:mo>,</mml:mo><mml:mspace linebreak="newline"/><mml:mtext>type of factorization</mml:mtext><mml:mtext> </mml:mtext><mml:mo indentalign="id" indenttarget="und1a1">:</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="1em"/><mml:mi mathvariant="monospace">factorization</mml:mi><mml:mo>=</mml:mo><mml:mi>“</mml:mi><mml:mi mathvariant="monospace">tucker</mml:mi><mml:mo>,</mml:mo><mml:mi>”</mml:mi><mml:mspace linebreak="newline"/><mml:mrow><mml:mi>rank</mml:mi><mml:mtext> </mml:mtext></mml:mrow><mml:mo indentalign="id" indenttarget="und1a1">:</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="1em"/><mml:mi mathvariant="monospace">rank</mml:mi><mml:mo>=</mml:mo><mml:mn>0.01</mml:mn><mml:mo>.</mml:mo></mml:mrow></mml:math></disp-formula>This is a model with 76,849 parameters that are tuned during the training to produce an optimal NO. The training optimization was performed using ADAM <xref ref-type="bibr" rid="c36">[36]</xref> with learning rate <inline-formula><mml:math display="inline"><mml:msup><mml:mn>10</mml:mn><mml:mrow><mml:mo>-</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, weight decay <inline-formula><mml:math display="inline"><mml:msup><mml:mn>10</mml:mn><mml:mrow><mml:mo>-</mml:mo><mml:mn>5</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> and batch size 256. Varying the above hyperparameters did not result in significant variations of the results.</p><p><italic>Training</italic>. The training dataset is prepared in the following manner. We generate random samples of amplitudes with finite partial wave expansions <disp-formula id="d11"><mml:math display="block"><mml:mrow><mml:mi>F</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:munderover><mml:mrow><mml:mo>∑</mml:mo></mml:mrow><mml:mrow><mml:mo>ℓ</mml:mo><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:munderover><mml:mo stretchy="false">(</mml:mo><mml:mn>2</mml:mn><mml:mo>ℓ</mml:mo><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mi>sin</mml:mi><mml:msub><mml:mrow><mml:mi>δ</mml:mi></mml:mrow><mml:mrow><mml:mo>ℓ</mml:mo></mml:mrow></mml:msub><mml:msup><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi><mml:msub><mml:mrow><mml:mi>δ</mml:mi></mml:mrow><mml:mrow><mml:mo>ℓ</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:msup><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mo>ℓ</mml:mo></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math><label>(11)</label></disp-formula>sampling the random phase shifts <inline-formula><mml:math display="inline"><mml:msub><mml:mi>δ</mml:mi><mml:mo>ℓ</mml:mo></mml:msub></mml:math></inline-formula> from a uniform distribution. 100,000 samples are collected for <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:math></inline-formula>, and 3, separately, providing a total of 300,000 amplitudes. For each of these amplitudes we read off their modulus <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> and the sine of their phase <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>. Afterward, we discretize <inline-formula><mml:math display="inline"><mml:mi>z</mml:mi><mml:mo>=</mml:mo><mml:mi>cos</mml:mi><mml:mi>θ</mml:mi><mml:mo>∈</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mo>-</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> on a uniform grid of 100 points<fn id="fn5"><label><sup>5</sup></label><p>A remarkable feature of NOs is their capacity to efficiently implement zero-shot superresolution <xref ref-type="bibr" rid="c37">[37]</xref>. In the context of quantum <inline-formula><mml:math display="inline"><mml:mn>2</mml:mn><mml:mo stretchy="false">→</mml:mo><mml:mn>2</mml:mn></mml:math></inline-formula> scattering, this gives us the ability to train on a grid of, say, 100 points and then easily make accurate predictions at higher resolutions. We did not see the need to go beyond the 100-point resolution in this problem, but it is good to keep in mind that this possibility exists.</p></fn> to produce 300,000 100-dimensional vectors <inline-formula><mml:math display="inline"><mml:mover accent="true"><mml:mi>B</mml:mi><mml:mo stretchy="false">→</mml:mo></mml:mover></mml:math></inline-formula> and 300,000 100-dimensional vectors <inline-formula><mml:math display="inline"><mml:mover accent="true"><mml:mrow><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi></mml:mrow><mml:mo stretchy="true">→</mml:mo></mml:mover></mml:math></inline-formula>. The collection of <inline-formula><mml:math display="inline"><mml:mover accent="true"><mml:mi>B</mml:mi><mml:mo stretchy="false">→</mml:mo></mml:mover></mml:math></inline-formula> vectors is converted to a <sc>p</sc>y<sc>t</sc>orch tensor that forms the input of the NO during training. Similarly, the collection of <inline-formula><mml:math display="inline"><mml:mover accent="true"><mml:mrow><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi></mml:mrow><mml:mo stretchy="true">→</mml:mo></mml:mover></mml:math></inline-formula> vectors is converted to a <sc>p</sc>y<sc>t</sc>orch tensor that forms the ground-truth output of the NO. We train on 98% of the samples (namely, 294,000 samples) and test on 2% (namely 6000 samples, evenly distributed across <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:math></inline-formula>). The results reported below are based on a single training run of 6500 epochs.</p><p>We emphasize that once the trained NO has been obtained, it can be used to make very quick predictions for any input modulus <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>, in stark contrast with the PINN approach, where retraining from scratch is needed for every new input.</p></sec><sec id="s4a2"><label>2.</label><title>Tests against known results</title><p>Once the NO has trained on known samples, we investigate how well it generalizes both on the same class of data (training/test dataset), as well as on different classes of never-before-seen data. Predicting the phases of amplitudes in the latter case would indicate that the NO is able to learn the unitarity relation <xref ref-type="disp-formula" rid="d5">(5)</xref> and effectively solve it without having direct access to it.</p><p><italic>Tests within the training-test dataset</italic>. For starters, we can ask about the quality of predictions inside the training/test dataset. In Fig. <xref ref-type="fig" rid="f1">1</xref> we plot the ground truth (blue) and predictions (orange) of the trained NO for <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> of three randomly chosen samples from the test dataset with <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:math></inline-formula>, respectively. The plots of the ground truth and prediction are visually indistinguishable, indicating that the NO has trained well. To get a sense of the numerical size of the error in the plots of Fig. <xref ref-type="fig" rid="f1">1</xref>, for <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> the average relative error between the ground truth and prediction across the whole <inline-formula><mml:math display="inline"><mml:mi>z</mml:mi></mml:math></inline-formula> grid is 0.4%. For <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:math></inline-formula> it is 1.1% and for <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>3</mml:mn></mml:math></inline-formula> it is 0.9%. These numbers are typical in the test dataset. The percentage of samples that exhibit average relative error above 10% is 5.2%.</p><fig id="f1"><object-id>1</object-id><object-id pub-id-type="doi">10.1103/PhysRevD.110.045020.f1</object-id><label>FIG. 1.</label><caption><p>Plots of the ground truth <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> (blue color) and FNO-predicted <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> (orange color) for three randomly chosen samples of amplitudes within the 6000 test dataset. From top to bottom we list plots for amplitudes with finite partial wave expansions and <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:math></inline-formula>, respectively.</p></caption><graphic xlink:href="e045020_1.eps"/></fig><p><italic>A first sample of tests on moduli with infinite partial wave expansions</italic>. A more interesting question concerns the extent to which the NO can generalize outside the training dataset. The first case we would like to discuss here are amplitudes with an <italic>infinite</italic> partial wave expansion. For concreteness, we will consider two examples of linear and quadratic moduli that were analyzed also in Ref. <xref ref-type="bibr" rid="c12">[12]</xref>. Later we will scan more extensively over the predictions for amplitudes with linear, quadratic, as well as cubic moduli.</p><p>The first example concerns amplitudes with linear modulus <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>a</mml:mi><mml:mi>z</mml:mi><mml:mo>+</mml:mo><mml:mi>b</mml:mi></mml:math></inline-formula> (and <inline-formula><mml:math display="inline"><mml:mi>b</mml:mi><mml:mo>&gt;</mml:mo><mml:mo stretchy="false">|</mml:mo><mml:mi>a</mml:mi><mml:mo stretchy="false">|</mml:mo></mml:math></inline-formula> for positivity). It is straightforward to check that these amplitudes do not have a finite partial wave expansion.<fn id="fn6"><label><sup>6</sup></label><p>For a detailed discussion see <xref ref-type="bibr" rid="c12">[12]</xref>.</p></fn> In the top left plot of Fig. <xref ref-type="fig" rid="f2">2</xref> we present the prediction of the NO for <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> when <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>10</mml:mn></mml:mfrac><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo>+</mml:mo><mml:mn>4</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> against a numerical solution of the unitarity equation <xref ref-type="disp-formula" rid="d5">(5)</xref> obtained with the use of an iteration scheme. This particular amplitude has Martin parameter <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>=</mml:mo><mml:mn>0.522</mml:mn></mml:math></inline-formula> and the iteration scheme converges very quickly. The NO prediction is denoted by orange, while the solution of the unitarity equation by blue. The two solutions are visibly close. On the top right plot of Fig. <xref ref-type="fig" rid="f2">2</xref> we also present (point-by-point on our <inline-formula><mml:math display="inline"><mml:mi>z</mml:mi></mml:math></inline-formula> grid) the relative difference <inline-formula><mml:math display="inline"><mml:mi>r</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> between the NO prediction <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:msub><mml:mi>ϕ</mml:mi><mml:mrow><mml:mi>NO</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> and the solution of the unitarity equation <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>sin</mml:mi><mml:msub><mml:mrow><mml:mi>ϕ</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mn>5</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula>, <disp-formula id="d12"><mml:math display="block"><mml:mrow><mml:mi>r</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>≔</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:mfrac><mml:mrow><mml:mi>sin</mml:mi><mml:msub><mml:mrow><mml:mi>ϕ</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mn>5</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">-</mml:mo><mml:mi>sin</mml:mi><mml:msub><mml:mrow><mml:mi>ϕ</mml:mi></mml:mrow><mml:mrow><mml:mi>NO</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow><mml:mrow><mml:mi>sin</mml:mi><mml:msub><mml:mrow><mml:mi>ϕ</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mn>5</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mfrac><mml:mo>|</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mrow></mml:math><label>(12)</label></disp-formula>For most points the relative difference is of the order of <inline-formula><mml:math display="inline"><mml:mi>O</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mn>10</mml:mn><mml:mrow><mml:mo>-</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:msup><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>.</p><fig id="f2"><object-id>2</object-id><object-id pub-id-type="doi">10.1103/PhysRevD.110.045020.f2</object-id><label>FIG. 2.</label><caption><p>The top two plots display the prediction of the trained NO for <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> against the exact result for input modulus <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>10</mml:mn></mml:mfrac><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo>+</mml:mo><mml:mn>4</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>. On the left are the actual functions, while on the right the pointwise relative difference. The bottom two plots display the corresponding data for input modulus <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>. Both cases refer to amplitudes with an infinite partial wave expansion.</p></caption><graphic xlink:href="e045020_2.eps"/></fig><p>The second example refers to the quadratic modulus <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>, which was also discussed in Ref. <xref ref-type="bibr" rid="c12">[12]</xref>. This amplitude has Martin parameter <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>=</mml:mo><mml:mn>0.867</mml:mn></mml:math></inline-formula> and can once again be determined numerically by solving the unitarity equation <xref ref-type="disp-formula" rid="d5">(5)</xref> with a simple iteration scheme. In the bottom left plot of Fig. <xref ref-type="fig" rid="f2">2</xref> we present in orange and blue, respectively, the <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi></mml:math></inline-formula> for the NO prediction and the solution of the unitarity equation. Once again, the two plots are visibly close. In the bottom right plot of Fig. <xref ref-type="fig" rid="f2">2</xref> we also present the relative difference, which is now of the order of <inline-formula><mml:math display="inline"><mml:mi>O</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mn>10</mml:mn><mml:mrow><mml:mo>-</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> for most points. It increases near <inline-formula><mml:math display="inline"><mml:mi>z</mml:mi><mml:mo>=</mml:mo><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>, where the prediction in the depicted run was less accurate.</p><p><italic>Scans on linear, quadratic and cubic moduli</italic>. We can test the quality of the NO predictions on moduli with infinite partial wave expansions more extensively, by performing a scan over a wide grid of linear, quadratic and cubic moduli. To quantify the quality of the predictions we compute the loss in the unitarity condition <xref ref-type="disp-formula" rid="d5">(5)</xref>, <disp-formula id="d13"><mml:math display="block"><mml:mrow><mml:mi mathvariant="script">L</mml:mi><mml:mo id="d13a1">≔</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>c</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:munder><mml:mrow><mml:mo>∑</mml:mo></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:munder><mml:msup><mml:mrow other="silent"><mml:mo>(</mml:mo><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>-</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow other="silent"><mml:mn>4</mml:mn><mml:mi>π</mml:mi><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mfrac><mml:msubsup><mml:mrow><mml:mo>∫</mml:mo></mml:mrow><mml:mrow><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msubsup><mml:mrow><mml:mo>∫</mml:mo></mml:mrow><mml:mrow><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mi>π</mml:mi></mml:mrow></mml:msubsup><mml:mi>d</mml:mi><mml:msub><mml:mrow><mml:mi>ϕ</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mspace linebreak="goodbreak"/><mml:mo indentalign="id" indentshift="1em" indenttarget="d13a1">⁢</mml:mo><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mi>cos</mml:mi><mml:mo minsize="2ex" stretchy="true">(</mml:mo><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo>-</mml:mo><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mrow><mml:mi>z</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">)</mml:mo><mml:mo minsize="2ex" stretchy="true">)</mml:mo><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>,</mml:mo></mml:mrow></mml:math><label>(13)</label></disp-formula>where <inline-formula><mml:math display="inline"><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mn>2</mml:mn><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is computed as in <xref ref-type="disp-formula" rid="d6">(6)</xref> with <inline-formula><mml:math display="inline"><mml:mi>z</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mi>z</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula>. The sum is over the points <inline-formula><mml:math display="inline"><mml:msub><mml:mi>z</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:math></inline-formula> of the collocation <inline-formula><mml:math display="inline"><mml:mi>z</mml:mi></mml:math></inline-formula> grid and the average is obtained by dividing with the number <inline-formula><mml:math display="inline"><mml:msub><mml:mi>N</mml:mi><mml:mi>c</mml:mi></mml:msub></mml:math></inline-formula> of collocation points. In our runs <inline-formula><mml:math display="inline"><mml:msub><mml:mi>N</mml:mi><mml:mi>c</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>100</mml:mn></mml:math></inline-formula>. The integrals in <xref ref-type="disp-formula" rid="d13">(13)</xref> were computed numerically using the trapezoidal rule.<fn id="fn7"><label><sup>7</sup></label><p>The results presented in this section used the fixed grid of 100 collocation points in the NO training in order to apply the trapezoidal rule. It is straightforward to achieve higher numerical accuracy in the numerical computation of the integrals in <xref ref-type="disp-formula" rid="d13">(13)</xref> using higher resolution grids with NO zero-shot superresolution.</p></fn> Figure <xref ref-type="fig" rid="f3">3</xref> displays the heat maps of the values of <inline-formula><mml:math display="inline"><mml:msub><mml:mi>log</mml:mi><mml:mn>10</mml:mn></mml:msub><mml:mi mathvariant="script">L</mml:mi></mml:math></inline-formula> for the NO predictions on a grid of linear, quadratic and cubic moduli. Let us comment on each of these plots separately.</p><fig id="f3"><object-id>3</object-id><object-id pub-id-type="doi">10.1103/PhysRevD.110.045020.f3</object-id><label>FIG. 3.</label><caption><p>Heat maps for the log base 10 loss of the NO prediction with respect to the unitarity condition <xref ref-type="disp-formula" rid="d5">(5)</xref>. The top left plot refers to linear moduli <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>a</mml:mi><mml:mi>z</mml:mi><mml:mo>+</mml:mo><mml:mi>b</mml:mi></mml:math></inline-formula>, the top right plot to quadratic moduli <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>c</mml:mi><mml:msup><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:mi>d</mml:mi></mml:math></inline-formula> and the bottom plot to cubic moduli <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>c</mml:mi><mml:msup><mml:mi>z</mml:mi><mml:mn>3</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:mi>d</mml:mi></mml:math></inline-formula>. Analogous results for the top two plots were obtained with the use of PINNs in Ref. <xref ref-type="bibr" rid="c12">[12]</xref> (see Figs. 3 and 5 of that paper). The thin black curves are the <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> boundaries while the thick gray curves express the dual bounds.</p></caption><graphic xlink:href="e045020_3.eps"/></fig><p>For the linear moduli <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>a</mml:mi><mml:mi>z</mml:mi><mml:mo>+</mml:mo><mml:mi>b</mml:mi></mml:math></inline-formula> we considered a grid of <inline-formula><mml:math display="inline"><mml:mn>180</mml:mn><mml:mo>×</mml:mo><mml:mn>150</mml:mn></mml:math></inline-formula> points on the <inline-formula><mml:math display="inline"><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> plane for <inline-formula><mml:math display="inline"><mml:mi>a</mml:mi><mml:mo>∈</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mo>-</mml:mo><mml:mn>0.5</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>, <inline-formula><mml:math display="inline"><mml:mi>b</mml:mi><mml:mo>∈</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> and <inline-formula><mml:math display="inline"><mml:mi>b</mml:mi><mml:mo>&gt;</mml:mo><mml:mo stretchy="false">|</mml:mo><mml:mi>a</mml:mi><mml:mo stretchy="false">|</mml:mo></mml:math></inline-formula>. The heat map of the <inline-formula><mml:math display="inline"><mml:msub><mml:mi>log</mml:mi><mml:mn>10</mml:mn></mml:msub><mml:mi mathvariant="script">L</mml:mi></mml:math></inline-formula> values on this grid appears on the top left plot of Fig. <xref ref-type="fig" rid="f3">3</xref>. The corresponding heat map in Ref. <xref ref-type="bibr" rid="c12">[12]</xref> appears in Fig. <xref ref-type="fig" rid="f3">3</xref> of that paper. Reference <xref ref-type="bibr" rid="c12">[12]</xref> computed on a grid of <inline-formula><mml:math display="inline"><mml:mn>75</mml:mn><mml:mo>×</mml:mo><mml:mn>60</mml:mn></mml:math></inline-formula> points in the <inline-formula><mml:math display="inline"><mml:mo stretchy="false">(</mml:mo><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> plane, <italic>retraining</italic> the neural networks for 2K epochs to obtain each point. Instead, we are <italic>evaluating</italic> the already trained NO at each point producing a heat map on a finer grid within approximately 20 sec.</p><p>In the top left plot of Fig. <xref ref-type="fig" rid="f3">3</xref> we observe a distinct, blue-colored, low-loss region inside the <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> contour, precisely like the one detected by PINNs for linear moduli in <xref ref-type="bibr" rid="c12">[12]</xref>. The main difference with the PINN result is that its lowest <inline-formula><mml:math display="inline"><mml:msub><mml:mi>log</mml:mi><mml:mn>10</mml:mn></mml:msub></mml:math></inline-formula> losses are in the vicinity of <inline-formula><mml:math display="inline"><mml:mo>-</mml:mo><mml:mn>8</mml:mn></mml:math></inline-formula>, whereas our corresponding values are in the vicinity of <inline-formula><mml:math display="inline"><mml:mo>-</mml:mo><mml:mn>5</mml:mn></mml:math></inline-formula>. That amounts to a difference in loss between the two methods at the level of 3 orders of magnitude. This is expected, since the PINN performs a dedicated optimization search for each input modulus <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> explicitly using the unitarity condition <xref ref-type="disp-formula" rid="d5">(5)</xref>, whereas the NO trains on a completely different class of inputs to produce a prediction outside its training dataset without using the unitarity condition. In that sense, the NO results in Fig. <xref ref-type="fig" rid="f3">3</xref> are impressive and provide a distinct indication that the NO has been able to generalize well within the infinite partial wave amplitudes with linear moduli. Of course, by simply looking at the heat map of Fig. <xref ref-type="fig" rid="f3">3</xref> one cannot really deduce where one should put the cutoff that separates the predictions that are consistent with unitarity from the ones that are inconsistent with unitarity. The same issue also exists within the PINN approach, but there it is slightly mitigated by the lower losses of the corresponding results. We will have to say more about how to address this difficulty in the next subsection.</p><p>For the quadratic moduli <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>c</mml:mi><mml:msup><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:mi>d</mml:mi></mml:math></inline-formula> we also considered a grid of <inline-formula><mml:math display="inline"><mml:mn>180</mml:mn><mml:mo>×</mml:mo><mml:mn>150</mml:mn></mml:math></inline-formula> points on the <inline-formula><mml:math display="inline"><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo>,</mml:mo><mml:mi>d</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> plane for <inline-formula><mml:math display="inline"><mml:mi>c</mml:mi><mml:mo>∈</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mo>-</mml:mo><mml:mn>0.5</mml:mn><mml:mo>,</mml:mo><mml:mo>,</mml:mo><mml:mn>5.5</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>, <inline-formula><mml:math display="inline"><mml:mi>d</mml:mi><mml:mo>∈</mml:mo><mml:mo stretchy="false">[</mml:mo><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>1.5</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> and <inline-formula><mml:math display="inline"><mml:mi>c</mml:mi><mml:mo>&gt;</mml:mo><mml:mo stretchy="false">|</mml:mo><mml:mi>d</mml:mi><mml:mo stretchy="false">|</mml:mo></mml:math></inline-formula>. The <inline-formula><mml:math display="inline"><mml:msub><mml:mi>log</mml:mi><mml:mn>10</mml:mn></mml:msub><mml:mi mathvariant="script">L</mml:mi></mml:math></inline-formula> heat map on this grid is depicted on the top right plot of Fig. <xref ref-type="fig" rid="f3">3</xref>. The corresponding heat map from Ref. <xref ref-type="bibr" rid="c12">[12]</xref> appears in Fig. <xref ref-type="fig" rid="f5">5</xref> of that paper. Once again, we observe the formation of a low-loss region inside the <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> contour, which is comparable with the result in Fig. 5 of Ref. <xref ref-type="bibr" rid="c12">[12]</xref>, suggesting that the NO has been able to generalize to this class of amplitudes as well. Similar to the linear case, the NO losses are higher by roughly 3 orders of magnitude compared to the PINN losses of <xref ref-type="bibr" rid="c12">[12]</xref>.</p><p>Finally, in the bottom plot of Fig. <xref ref-type="fig" rid="f3">3</xref> we present the <inline-formula><mml:math display="inline"><mml:msub><mml:mi>log</mml:mi><mml:mn>10</mml:mn></mml:msub></mml:math></inline-formula> unitarity loss for cubic moduli of the form <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>c</mml:mi><mml:msup><mml:mi>z</mml:mi><mml:mn>3</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:mi>d</mml:mi></mml:math></inline-formula>. Such amplitudes were not discussed in Ref. <xref ref-type="bibr" rid="c12">[12]</xref>. The resulting heat map is comparable to the linear-moduli heat map, exhibiting a distinct low-loss region inside the <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> contour, as expected.</p><p>To summarize, in all three cases of infinite partial wave amplitudes analyzed in this subsection, the picture that emerges is impressively consistent with expectations from the analysis of the unitarity equation <xref ref-type="disp-formula" rid="d5">(5)</xref>, suggesting that the NO has learned nontrivial features of that equation without having access to it. The results also exhibit some of the weaknesses of the approach: <list list-type="alpha-lower"><list-item><label>(a)</label><p>The lowest losses are a few orders of magnitude higher than those produced by PINNs. That makes it harder to detect, without prior knowledge, the boundary between valid predictions consistent with unitarity and invalid predictions inconsistent with unitarity.</p></list-item><list-item><label>(b)</label><p>The low-loss regions (in blue) are not perfectly aligned with the <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> and dual bounds. For example, there are small blue regions violating the dual bounds.</p></list-item><list-item><label>(c)</label><p>The quadratic-modulus NO heat map in Fig. <xref ref-type="fig" rid="f3">3</xref> does not appear to detect the additional solutions appearing in Fig. 5 of <xref ref-type="bibr" rid="c12">[12]</xref> (e.g. the two small islands of <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:math></inline-formula> finite partial wave solutions).</p></list-item><list-item><label>(d)</label><p>To demonstrate nontrivial learning in the above cases, we had to use the unitarity relation <xref ref-type="disp-formula" rid="d5">(5)</xref>. Without explicit knowledge of that equation the heat maps in Fig. <xref ref-type="fig" rid="f3">3</xref> would not have been possible. In addition, it is unclear to what degree of generality the NO has been able to learn the unitarity equation and whether it can make equally accurate predictions in arbitrary classes of infinite partial wave amplitudes.</p></list-item></list>We will return to these issues in the next subsection.</p><p><italic>Tests on higher-</italic><inline-formula><mml:math display="inline"><mml:mi>L</mml:mi></mml:math></inline-formula> <italic>finite partial wave amplitudes</italic>. Another class of amplitudes outside the training-test dataset are the finite partial wave amplitudes with values of <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>&gt;</mml:mo><mml:mn>3</mml:mn></mml:math></inline-formula>. Exploring the quality of the NO predictions in this class shows that already at <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>4</mml:mn></mml:math></inline-formula> the NO fails to make any accurate predictions. This is, for example, apparent in the predictions presented at the bottom plot of Fig. <xref ref-type="fig" rid="f6">6</xref>, which depicts as a thick gray curve the exact result and as blue and orange dots the predictions of two separately trained NOs. The corresponding input modulus <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> appears on the top left plot of Fig. <xref ref-type="fig" rid="f6">6</xref>.</p><p>This case demonstrates that with the above-mentioned training on <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:math></inline-formula> amplitudes the NO cannot fully reconstruct the unitarity equation, which would allow for valid predictions with arbitrary input modulus <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>. It has been able to learn nontrivial elements of the unitarity constraints, but not all the information that these constraints entail.</p></sec></sec><sec id="s4b"><label>B.</label><title>Neural operator setup II: Learning false predictions</title><p>The above observations raise the following related questions: <list list-type="alpha-lower"><list-item><label>(a)</label><p>Can NOs learn to rate the quality of their predictions producing reliable results without any reference to the unitarity equation <xref ref-type="disp-formula" rid="d5">(5)</xref>?</p></list-item><list-item><label>(b)</label><p>Can NOs distinguish between moduli <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> that are allowed by unitarity and moduli that are not?</p></list-item><list-item><label>(c)</label><p>Can NOs uncover quantifiable elements of the unitarity equation without having access to it?</p></list-item></list>In this subsection we want to focus exclusively on results that can be obtained without any use of the unitarity equation. This immediately removes PINNs as a viable methodology. In general, asking whether we can obtain any results without the underlying equation is interesting, because there is a plethora of problems in physics and mathematics where knowledge of the underlying structure is missing.</p><p>In the setup of Sec. <xref ref-type="sec" rid="s4a">IV A</xref>, the NOs are designed to make a prediction for arbitrary input modulus <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>. Without prior knowledge of the unitarity equation <xref ref-type="disp-formula" rid="d5">(5)</xref> it is impossible to deduce whether the solution exists, whether a prediction is valid, or to rate the quality of a prediction for a solution that exists. To address this difficulty, we propose setting up a slight variant of the NO of Sec. <xref ref-type="sec" rid="s4a">IV A</xref>, where the output has two components: the predicted <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> and a classifying label that we call fidelity index <inline-formula><mml:math display="inline"><mml:mi mathvariant="script">F</mml:mi></mml:math></inline-formula>, which contains information about the validity of the prediction. Accordingly, we now train the NO on two types of <inline-formula><mml:math display="inline"><mml:mo stretchy="false">(</mml:mo><mml:mover accent="true"><mml:mi>B</mml:mi><mml:mo stretchy="false">→</mml:mo></mml:mover><mml:mo>,</mml:mo><mml:mover accent="true"><mml:mrow><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi></mml:mrow><mml:mo stretchy="true">→</mml:mo></mml:mover><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> samples: the first contains the moduli and phases of valid finite partial wave amplitudes and the second false moduli and phases that do not correspond to any amplitude. This setup should allow the NO to learn what it means to make a right prediction.</p><sec id="s4b1"><label>1.</label><title>Hyperparameters and training</title><p><italic>Hyperparameters</italic>. The results presented in this section were obtained with a 1D TFNO that has the same neural network and optimization hyperparameters as the model in Sec. <xref ref-type="sec" rid="s4a1">IV A 1</xref>. However, in this case a different tensorization approach yields a larger model with a total number of 874,241 parameters (an order of magnitude larger than the one in the previous model of Sec. <xref ref-type="sec" rid="s4a1">IV A 1</xref>).</p><p><italic>Training</italic>. We are using the same <inline-formula><mml:math display="inline"><mml:mi>z</mml:mi></mml:math></inline-formula> grid as in Sec. <xref ref-type="sec" rid="s4a1">IV A 1</xref> with 100 collocation points. The output vector of the NO is therefore 101 dimensional, including an extra element <inline-formula><mml:math display="inline"><mml:msub><mml:mi>v</mml:mi><mml:mn>101</mml:mn></mml:msub></mml:math></inline-formula> characterizing the validity of the output. In our runs we chose to train by assigning the value 10 to valid input-output pairs and <inline-formula><mml:math display="inline"><mml:mo>-</mml:mo><mml:mn>10</mml:mn></mml:math></inline-formula> to false pairs. The fidelity index was defined as <inline-formula><mml:math display="inline"><mml:mi mathvariant="script">F</mml:mi><mml:mo>≔</mml:mo><mml:mfrac><mml:mrow><mml:mn>10</mml:mn><mml:mo stretchy="false">+</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mn>101</mml:mn></mml:msub></mml:mrow><mml:mn>20</mml:mn></mml:mfrac></mml:math></inline-formula>, which assigns 1 to valid pairs and 0 to invalid ones.</p><p>We explored the results of training for a variety of datasets with varying fractions of true and false inputs/outputs. As one would expect, we observed that the quality of the classification output decreased when the fraction of false pairs was reduced. Here, we report results for a training-test dataset of 400,000 samples with the following composition: 75,000 true pairs for each of the <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:math></inline-formula> amplitudes and 175,000 false pairs. This yields a 43.75% fraction of false samples. The inputs and outputs of the false samples were generated randomly from two different groups of <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>3</mml:mn></mml:math></inline-formula> amplitudes. We reserved 1200 samples for testing and the samples were randomly mixed to put the true and false pairs in random order. With these specifications, we trained 56 independent NOs for 1500 epochs.</p></sec><sec id="s4b2"><label>2.</label><title>Tests and observations</title><p><italic>Tests within the training-test dataset</italic>. The accuracy of the fidelity-index prediction can be probed by computing the difference <disp-formula id="d14"><mml:math display="block"><mml:mrow><mml:mi mathvariant="normal">Δ</mml:mi><mml:mi mathvariant="script">F</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mtext>sample</mml:mtext><mml:mo stretchy="false">)</mml:mo><mml:mo>≔</mml:mo><mml:mo stretchy="false">|</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow><mml:mrow><mml:mi>pred</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mtext>sample</mml:mtext><mml:mo stretchy="false">)</mml:mo><mml:mo>-</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow><mml:mrow><mml:mi>ground</mml:mi><mml:mtext> </mml:mtext><mml:mi>truth</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mtext>sample</mml:mtext><mml:mo stretchy="false">)</mml:mo><mml:mo stretchy="false">|</mml:mo><mml:mspace linebreak="goodbreak"/></mml:mrow></mml:math><label>(14)</label></disp-formula>between the predicted fidelity index and its ground truth, for each sample in the test dataset of 1200 samples and separately for each of the above 56 trained NOs. Assuming that a prediction is considered correct when <disp-formula id="d15"><mml:math display="block"><mml:mi mathvariant="normal">Δ</mml:mi><mml:mi mathvariant="script">F</mml:mi><mml:mo>&lt;</mml:mo><mml:mi>C</mml:mi></mml:math><label>(15)</label></disp-formula>for some arbitrarily chosen <inline-formula><mml:math display="inline"><mml:mi>C</mml:mi><mml:mo>&lt;</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>, we can go through the samples and register the number of times the inequality <xref ref-type="disp-formula" rid="d15">(15)</xref> is satisfied. This produces a success ratio <inline-formula><mml:math display="inline"><mml:msub><mml:mi>S</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>C</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> for the <inline-formula><mml:math display="inline"><mml:mi>i</mml:mi></mml:math></inline-formula>th NO. We can further average this success ratio over the NOs; we call the corresponding quantity <inline-formula><mml:math display="inline"><mml:msub><mml:mover accent="true"><mml:mi>S</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover><mml:mi mathvariant="script">F</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>C</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>. For <inline-formula><mml:math display="inline"><mml:mi>C</mml:mi><mml:mo>=</mml:mo><mml:mn>0.2</mml:mn></mml:math></inline-formula> and 0.3 we find <disp-formula id="d16"><mml:math display="block"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mn>0.2</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo>≃</mml:mo><mml:mn>73.94</mml:mn><mml:mo>%</mml:mo><mml:mo>,</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="2em"/><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mn>0.3</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo>≃</mml:mo><mml:mn>75.34</mml:mn><mml:mo>%</mml:mo><mml:mo>.</mml:mo></mml:mrow></mml:math><label>(16)</label></disp-formula>The values <inline-formula><mml:math display="inline"><mml:msubsup><mml:mi>S</mml:mi><mml:mi mathvariant="script">F</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo stretchy="false">(</mml:mo><mml:mn>0.2</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> and <inline-formula><mml:math display="inline"><mml:msubsup><mml:mi>S</mml:mi><mml:mi mathvariant="script">F</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msubsup><mml:mo stretchy="false">(</mml:mo><mml:mn>0.3</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> for the individual NOs are very close to the above averages. In other words, there is little variation between the different NOs in this datum. This suggests that (with the above cutoffs for <inline-formula><mml:math display="inline"><mml:mi mathvariant="normal">Δ</mml:mi><mml:mi mathvariant="script">F</mml:mi></mml:math></inline-formula>) the fidelity index makes the right classification roughly 75% of the time, which is an encouraging sign of classification capacity but not an impressively high percentage.</p><p>We can also rephrase the above test in terms of a mean fidelity index <inline-formula><mml:math display="inline"><mml:mover accent="true"><mml:mi mathvariant="script">F</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover><mml:mo stretchy="false">(</mml:mo><mml:mtext>sample</mml:mtext><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>, which is defined for each sample as the average over the corresponding fidelity indices of all trained NOs. We can then define the difference <inline-formula><mml:math display="inline"><mml:mi mathvariant="normal">Δ</mml:mi><mml:mover accent="true"><mml:mi mathvariant="script">F</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover><mml:mo stretchy="false">(</mml:mo><mml:mtext>sample</mml:mtext><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> as in <xref ref-type="disp-formula" rid="d14">(14)</xref> by replacing <inline-formula><mml:math display="inline"><mml:mi mathvariant="script">F</mml:mi></mml:math></inline-formula> with <inline-formula><mml:math display="inline"><mml:mover accent="true"><mml:mi mathvariant="script">F</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover></mml:math></inline-formula>, placing a cutoff as in <xref ref-type="disp-formula" rid="d15">(15)</xref> and computing the average <inline-formula><mml:math display="inline"><mml:msub><mml:mover accent="true"><mml:mi>S</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover><mml:mover accent="true"><mml:mi mathvariant="script">F</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>C</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> over the samples. For this quantity we find <disp-formula id="d17"><mml:math display="block"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mn>0.2</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo>≃</mml:mo><mml:mn>67.42</mml:mn><mml:mo>%</mml:mo><mml:mo>,</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="2em"/><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>S</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mn>0.3</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo>≃</mml:mo><mml:mn>73</mml:mn><mml:mo>%</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math><label>(17)</label></disp-formula>which is comparable to the previous result. We conclude that the average of the fidelity index over the NOs did not improve the classification capacity in this context.</p><p>These observations provide useful information about the performance of the NO as a classifier, but do not tell the whole story. In particular, we would now like to argue that the above tests do not really address some important aspects of the classification performance. Indeed, when one uses a NO to make a prediction for a never-before-seen modulus, it is very useful to know whether a predicted phase truly exists and can be considered correct with confidence, given an appropriately high fidelity index. Everything outside a small range of high fidelity values near 1 can be considered either as plausibly false for an existing phase or false because a phase does not exist. This viewpoint rephrases the way we should measure the success ratio in the test dataset.</p><p>Accordingly, we can now perform the following test. For each individually trained NO, we scan through the test dataset and count how many times the NO falsely affirms that the prediction is correct. The criterion for a prediction to be declared correct is <disp-formula id="d18"><mml:math display="block"><mml:mo stretchy="false">|</mml:mo><mml:msub><mml:mi mathvariant="script">F</mml:mi><mml:mrow><mml:mi>pred</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mtext>sample</mml:mtext><mml:mo stretchy="false">)</mml:mo><mml:mo>-</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">|</mml:mo><mml:mo>&lt;</mml:mo><mml:mi>C</mml:mi><mml:mo>.</mml:mo></mml:math><label>(18)</label></disp-formula>As we scan through the samples we count the cases where this inequality is satisfied and the ground-truth fidelity index vanishes (namely, the sample is false). That gives a percentage<fn id="fn8"><label><sup>8</sup></label><p>We define the ratio that gives this percentage as the number of false predictions satisfying <xref ref-type="disp-formula" rid="d18">(18)</xref> divided by the total number of predictions satisfying <xref ref-type="disp-formula" rid="d18">(18)</xref>.</p></fn> of failure <inline-formula><mml:math display="inline"><mml:msup><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo stretchy="false">(</mml:mo><mml:mi>C</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> for the <inline-formula><mml:math display="inline"><mml:mi>i</mml:mi></mml:math></inline-formula>th NO. We want to examine if we can choose a small enough <inline-formula><mml:math display="inline"><mml:mi>C</mml:mi></mml:math></inline-formula> in <xref ref-type="disp-formula" rid="d18">(18)</xref> that yields high confidence in true predictions [that is, small <inline-formula><mml:math display="inline"><mml:msup><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo stretchy="false">(</mml:mo><mml:mi>C</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>], but we also want to check how many true cases we missed with this criterion. We can also compute the average over the NOs <disp-formula id="d19"><mml:math display="block"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>C</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>ops</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:munder><mml:mrow><mml:mo>∑</mml:mo></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:munder><mml:msup><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo stretchy="false">(</mml:mo><mml:mi>C</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo></mml:mrow></mml:math><label>(19)</label></disp-formula>where <inline-formula><mml:math display="inline"><mml:mrow><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>ops</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>56</mml:mn></mml:mrow></mml:math></inline-formula> is the number of NOs. The label <inline-formula><mml:math display="inline"><mml:mi mathvariant="script">F</mml:mi></mml:math></inline-formula> in <inline-formula><mml:math display="inline"><mml:msub><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover><mml:mi mathvariant="script">F</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>C</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> is there to remind us that we are using the fidelity index of the individual NOs to evaluate the criterion <xref ref-type="disp-formula" rid="d18">(18)</xref> (this will change in a moment). As above, we did not observe significant variation in <inline-formula><mml:math display="inline"><mml:msup><mml:mi>f</mml:mi><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup><mml:mo stretchy="false">(</mml:mo><mml:mi>C</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> among different NOs. Therefore, we quote here the values of <inline-formula><mml:math display="inline"><mml:msub><mml:mover accent="true"><mml:mi>f</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover><mml:mi mathvariant="script">F</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>C</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> for <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>C</mml:mi><mml:mo>=</mml:mo><mml:mn>0.01</mml:mn><mml:mo>,</mml:mo><mml:mn>0.02</mml:mn><mml:mo>,</mml:mo><mml:mn>0.05</mml:mn><mml:mo>,</mml:mo><mml:mn>0.1</mml:mn><mml:mo>,</mml:mo><mml:mn>0.2</mml:mn></mml:mrow></mml:math></inline-formula> along with the fraction of correct predictions of true samples over the total number of true samples <disp-formula id="d20"><mml:math display="block"><mml:mrow><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mn>0.01</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo id="d20a1">=</mml:mo><mml:mn>4.08</mml:mn><mml:mo>%</mml:mo><mml:mo>,</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="1em"/><mml:mfrac><mml:mrow><mml:mtext>correct true predictions</mml:mtext></mml:mrow><mml:mrow><mml:mtext>total</mml:mtext><mml:mtext> </mml:mtext><mml:mtext>no</mml:mtext><mml:mo>.</mml:mo><mml:mtext> </mml:mtext><mml:mtext>of true samples</mml:mtext></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mn>66.5</mml:mn><mml:mo>%</mml:mo><mml:mspace linebreak="newline"/><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mn>0.02</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo indentalign="id" indenttarget="d20a1">=</mml:mo><mml:mn>5.37</mml:mn><mml:mo>%</mml:mo><mml:mo>,</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="1em"/><mml:mfrac><mml:mrow><mml:mtext>correct true predictions</mml:mtext></mml:mrow><mml:mrow><mml:mtext>total</mml:mtext><mml:mtext> </mml:mtext><mml:mtext>no</mml:mtext><mml:mo>.</mml:mo><mml:mtext> </mml:mtext><mml:mtext>of true samples</mml:mtext></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mn>67.3</mml:mn><mml:mo>%</mml:mo><mml:mspace linebreak="newline"/><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mn>0.05</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo indentalign="id" indenttarget="d20a1">=</mml:mo><mml:mn>8.00</mml:mn><mml:mo>%</mml:mo><mml:mo>,</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="1em"/><mml:mfrac><mml:mrow><mml:mtext>correct true predictions</mml:mtext></mml:mrow><mml:mrow><mml:mtext>total</mml:mtext><mml:mtext> </mml:mtext><mml:mtext>no</mml:mtext><mml:mo>.</mml:mo><mml:mtext> </mml:mtext><mml:mtext>of true samples</mml:mtext></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mn>68.3</mml:mn><mml:mo>%</mml:mo><mml:mspace linebreak="newline"/><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mn>0.10</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo indentalign="id" indenttarget="d20a1">=</mml:mo><mml:mn>9.99</mml:mn><mml:mo>%</mml:mo><mml:mo>,</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="1em"/><mml:mfrac><mml:mrow><mml:mtext>correct true predictions</mml:mtext></mml:mrow><mml:mrow><mml:mtext>total</mml:mtext><mml:mtext> </mml:mtext><mml:mtext>no</mml:mtext><mml:mo>.</mml:mo><mml:mtext> </mml:mtext><mml:mtext>of true samples</mml:mtext></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mn>69.2</mml:mn><mml:mo>%</mml:mo><mml:mspace linebreak="newline"/><mml:msub><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mn>0.20</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo indentalign="id" indenttarget="d20a1">=</mml:mo><mml:mn>11.71</mml:mn><mml:mo>%</mml:mo><mml:mo>,</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="1em"/><mml:mfrac><mml:mrow><mml:mtext>correct true predictions</mml:mtext></mml:mrow><mml:mrow><mml:mtext>total</mml:mtext><mml:mtext> </mml:mtext><mml:mtext>no</mml:mtext><mml:mo>.</mml:mo><mml:mtext> </mml:mtext><mml:mtext>of true samples</mml:mtext></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mn>69.8</mml:mn><mml:mo>%</mml:mo><mml:mo>.</mml:mo><mml:mspace linebreak="goodbreak"/><mml:malignmark/></mml:mrow></mml:math><label>(20)</label></disp-formula></p><p>We notice that the predictions of a true solution are only 4.08% of the times wrong when the fidelity index is inside the interval [0.99, 1.01]. This implies relatively high confidence in such predictions. We also notice that this criterion captures 66.5% of the total number of true samples in the test dataset. As we increase <inline-formula><mml:math display="inline"><mml:mi>C</mml:mi></mml:math></inline-formula> (and with it the corresponding range of accepted fidelity indices) the fraction of wrong predictions increases and our confidence goes down, but the fraction of correct true predictions saturates. This implies that a small value of <inline-formula><mml:math display="inline"><mml:mi>C</mml:mi></mml:math></inline-formula> at the level of 0.01 is a preferable choice.</p><p>It is also interesting to reevaluate these numbers using the mean fidelity index <inline-formula><mml:math display="inline"><mml:mover accent="true"><mml:mi mathvariant="script">F</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover></mml:math></inline-formula>. In that case, we are first averaging the fidelity index over the trained NOs for a given sample to produce the corresponding mean fidelity index <inline-formula><mml:math display="inline"><mml:msub><mml:mover accent="true"><mml:mi mathvariant="script">F</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover><mml:mrow><mml:mi>pred</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mtext>sample</mml:mtext><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>, then we use it to impose a criterion like <xref ref-type="disp-formula" rid="d18">(18)</xref> and accordingly count which of the allowed samples are false predictions. This procedure yields a percentage of failure <inline-formula><mml:math display="inline"><mml:msub><mml:mi>f</mml:mi><mml:mover accent="true"><mml:mi mathvariant="script">F</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>C</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> for the “mean NO” and the analog of <xref ref-type="disp-formula" rid="d20">(20)</xref> is <disp-formula id="d21"><mml:math display="block"><mml:mrow><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mn>0.01</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo id="d21a1">=</mml:mo><mml:mn>1.42</mml:mn><mml:mo>%</mml:mo><mml:mo>,</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="1em"/><mml:mfrac><mml:mrow><mml:mtext>correct true predictions</mml:mtext></mml:mrow><mml:mrow><mml:mtext>total</mml:mtext><mml:mtext> </mml:mtext><mml:mtext>no</mml:mtext><mml:mo>.</mml:mo><mml:mtext>of true samples</mml:mtext></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mn>61.6</mml:mn><mml:mo>%</mml:mo><mml:mspace linebreak="newline"/><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mn>0.02</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo indentalign="id" indenttarget="d21a1">=</mml:mo><mml:mn>1.56</mml:mn><mml:mo>%</mml:mo><mml:mo>,</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="1em"/><mml:mfrac><mml:mrow><mml:mtext>correct true predictions</mml:mtext></mml:mrow><mml:mrow><mml:mtext>total</mml:mtext><mml:mtext> </mml:mtext><mml:mtext>no</mml:mtext><mml:mo>.</mml:mo><mml:mtext> </mml:mtext><mml:mtext>of true samples</mml:mtext></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mn>65.2</mml:mn><mml:mo>%</mml:mo><mml:mspace linebreak="newline"/><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mn>0.05</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo indentalign="id" indenttarget="d21a1">=</mml:mo><mml:mn>1.97</mml:mn><mml:mo>%</mml:mo><mml:mo>,</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="1em"/><mml:mfrac><mml:mrow><mml:mtext>correct true predictions</mml:mtext></mml:mrow><mml:mrow><mml:mtext>total</mml:mtext><mml:mtext> </mml:mtext><mml:mtext>no</mml:mtext><mml:mo>.</mml:mo><mml:mtext> </mml:mtext><mml:mtext>of true samples</mml:mtext></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mn>66.4</mml:mn><mml:mo>%</mml:mo><mml:mspace linebreak="newline"/><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mn>0.10</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo indentalign="id" indenttarget="d21a1">=</mml:mo><mml:mn>2.58</mml:mn><mml:mo>%</mml:mo><mml:mo>,</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="1em"/><mml:mfrac><mml:mrow><mml:mtext>correct true predictions</mml:mtext></mml:mrow><mml:mrow><mml:mtext>total</mml:mtext><mml:mtext> </mml:mtext><mml:mtext>no</mml:mtext><mml:mo>.</mml:mo><mml:mtext> </mml:mtext><mml:mtext>of true samples</mml:mtext></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mn>67.0</mml:mn><mml:mo>%</mml:mo><mml:mspace linebreak="newline"/><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mover accent="true"><mml:mrow><mml:mi mathvariant="script">F</mml:mi></mml:mrow><mml:mrow><mml:mo stretchy="false">¯</mml:mo></mml:mrow></mml:mover></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mn>0.20</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo indentalign="id" indenttarget="d21a1">=</mml:mo><mml:mn>4.63</mml:mn><mml:mo>%</mml:mo><mml:mo>,</mml:mo><mml:mspace depth="0.0ex" height="0.0ex" width="1em"/><mml:mfrac><mml:mrow><mml:mtext>correct true predictions</mml:mtext></mml:mrow><mml:mrow><mml:mtext>total</mml:mtext><mml:mtext> </mml:mtext><mml:mtext>no</mml:mtext><mml:mo>.</mml:mo><mml:mtext> </mml:mtext><mml:mtext>of true samples</mml:mtext></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mn>67.1</mml:mn><mml:mo>%</mml:mo><mml:mo>.</mml:mo><mml:mspace linebreak="goodbreak"/><mml:malignmark/></mml:mrow></mml:math><label>(21)</label></disp-formula></p><p>We observe that the mean fidelity index produces a lower percentage of failure at the same value of <inline-formula><mml:math display="inline"><mml:mi>C</mml:mi></mml:math></inline-formula> (compared to the index of individual NOs) and, therefore, can be used to make predictions of correct phases with greater confidence. For example, the percentage of failed true predictions for the mean fidelity index at <inline-formula><mml:math display="inline"><mml:mi>C</mml:mi><mml:mo>=</mml:mo><mml:mn>0.01</mml:mn></mml:math></inline-formula> is only 1.42%, compared to 4.08% of the individual fidelity indices. The fraction of correct true predictions is comparable in both cases meaning that the mean fidelity index continues to detect essentially the same number of true samples with higher confidence.</p><p><italic>Correlations between the unitarity loss and the fidelity index</italic>. As a further calibrating question we can ask whether the fidelity index correlates with the values of the unitarity loss. As an example, in Fig. <xref ref-type="fig" rid="f4">4</xref> we contrast the heat map of the <inline-formula><mml:math display="inline"><mml:msub><mml:mi>log</mml:mi><mml:mn>10</mml:mn></mml:msub></mml:math></inline-formula> unitarity loss and the heat map of the fidelity index for the predictions of one of the 56 trained NOs on the quadratic moduli <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>c</mml:mi><mml:msup><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:mi>d</mml:mi></mml:math></inline-formula>. There is visible correlation between the plots, and this is typical in many training runs both for linear and quadratic moduli. It is difficult, however, to make a precise quantitative statement about their relation.</p><fig id="f4"><object-id>4</object-id><object-id pub-id-type="doi">10.1103/PhysRevD.110.045020.f4</object-id><label>FIG. 4.</label><caption><p>Predictions for quadratic moduli <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>c</mml:mi><mml:msup><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:mi>d</mml:mi></mml:math></inline-formula> by one of the 56 NOs trained on both true and false <inline-formula><mml:math display="inline"><mml:mi>S</mml:mi></mml:math></inline-formula>-matrix phases. The heat map on the left presents the <inline-formula><mml:math display="inline"><mml:msub><mml:mi>log</mml:mi><mml:mn>10</mml:mn></mml:msub></mml:math></inline-formula> unitarity loss of the predictions. The heat map on the right presents the value of the fidelity index <inline-formula><mml:math display="inline"><mml:mi mathvariant="script">F</mml:mi></mml:math></inline-formula>. The color bar scale for the latter focuses on values between 0.95 and 1. Values below 0.95 are depicted in deep blue and values above 1 are depicted in deep red. As in previous plots, we have included the curve at <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> (light black) and the curve of the dual bound (thick gray).</p></caption><graphic xlink:href="e045020_4.eps"/></fig><fig id="f5"><object-id>5</object-id><object-id pub-id-type="doi">10.1103/PhysRevD.110.045020.f5</object-id><label>FIG. 5.</label><caption><p>The left heat map depicts the value of the mean fidelity index <inline-formula><mml:math display="inline"><mml:mover accent="true"><mml:mi mathvariant="script">F</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover></mml:math></inline-formula> on the landscape of linear moduli <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>a</mml:mi><mml:mi>z</mml:mi><mml:mo>+</mml:mo><mml:mi>b</mml:mi></mml:math></inline-formula>. The right heat map depicts the corresponding values of the mean fidelity index for quadratic moduli <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>c</mml:mi><mml:msup><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:mi>d</mml:mi></mml:math></inline-formula>. Notice that the color bar scale focuses on values between 0.95 and 1. We have also included the curve at <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> (light black) and the curve of the dual bound (thick gray).</p></caption><graphic xlink:href="e045020_5.eps"/></fig><fig id="f6"><object-id>6</object-id><object-id pub-id-type="doi">10.1103/PhysRevD.110.045020.f6</object-id><label>FIG. 6.</label><caption><p>The top left plot depicts the modulus of a random <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>4</mml:mn></mml:math></inline-formula> amplitude. In the top right plot we present the fidelity indices for each of the 56 trained NOs evaluated on this specific modulus. The two points with a red circle around them represent the two predictions with fidelity indices closest to 1 (the actual values being 0.957 and 0.962, respectively). In the bottom plot we present the corresponding <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> predictions of these two cases (in blue and orange) against the exact result represented by the thick gray curve.</p></caption><graphic xlink:href="e045020_6.eps"/></fig><p>We also notice a clean separation between the predictions with high fidelity index (above 0.99) and predictions with low fidelity index (below 0.95). This is an interesting feature that correlates well with the above-mentioned observations about the fidelity index and its success rate. Unlike the unitarity loss, which varies smoothly between true and false predictions, the fidelity index appears to provide a more sharp acceptance/rejection criterion.</p><p>The results of the 56 trained NOs warrant some additional observations. First, we notice that the presence of the extra label that classifies the sample as true or false has affected the nature of the predictions across the landscape of input moduli. This is visible in the comparison of the unitarity losses in the top right heat map of Fig. <xref ref-type="fig" rid="f3">3</xref> against the heat map on the left of Fig. <xref ref-type="fig" rid="f4">4</xref>.</p><p>Second, the unitarity losses for the predictions of the 56 NOs that included the fidelity label are typically slightly higher than those for the predictions of the NOs in Sec. <xref ref-type="sec" rid="s4a2">IV A 2</xref>, which did not involve any training on false samples. This is expected, since we were training with 300,000 true samples in Sec. <xref ref-type="sec" rid="s4a2">IV A 2</xref>, whereas the training here involves a smaller number of true samples, 225,000.</p><p>Third, across the set of the 56 different NOs, we observed significant variation in the heat maps of the predicted phases and their fidelity index. This observation hints at the complexities of the training process in this context and makes it harder to extract invariant information from individual training runs. It is therefore interesting to explore whether we can obtain information independent of the fluctuations of individual training iterations, reflecting real properties of the system, by collecting statistics from multiple runs.</p><p><italic>Performance of the mean fidelity index</italic>. The above discussion is further motivation in favor of the use of the mean fidelity index <inline-formula><mml:math display="inline"><mml:mover accent="true"><mml:mi mathvariant="script">F</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover></mml:math></inline-formula>, which involves an average over independent NOs. In what follows, we evaluate <inline-formula><mml:math display="inline"><mml:mover accent="true"><mml:mi mathvariant="script">F</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover></mml:math></inline-formula> across the 56 previously trained NOs and plot the results in Fig. <xref ref-type="fig" rid="f5">5</xref> across the landscape of linear and quadratic moduli. We observed that as we incorporated more and more NOs into the mean, there was an apparent convergence to the heat maps of Fig. <xref ref-type="fig" rid="f5">5</xref>. In the process, random fluctuating patterns from individual runs disappeared.</p><p>For both linear and quadratic moduli we notice that the averaging over NOs preserves the sharp transition between high and low fidelity indices that was characteristic in individual runs. Additionally, we once again observe that the high fidelity index regions in red (with values above 0.99) match well with the expectations from the test dataset for high confidence true predictions based on the mean fidelity index in this range.</p><p>For linear (quadratic) moduli, <inline-formula><mml:math display="inline"><mml:mover accent="true"><mml:mi mathvariant="script">F</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover></mml:math></inline-formula> is plotted on the left (right) heat map of Fig. <xref ref-type="fig" rid="f5">5</xref>, together with the <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> and dual bounds. We notice the characteristic concentration of high fidelity values around the <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>&lt;</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> region, which indicates that in the vicinity of this region the NOs correctly recognize predictions with the expected qualitative features of valid solutions.</p><p>The heat map of <inline-formula><mml:math display="inline"><mml:mover accent="true"><mml:mi mathvariant="script">F</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover></mml:math></inline-formula> in the quadratic scan exhibits some additional intriguing features that seem to fit well with features of the heat map derived in Fig. 5 of Ref. <xref ref-type="bibr" rid="c12">[12]</xref> using PINNs. The high fidelity red region in our plot stretches above the upper <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> boundary in a manner that seems to correlate with a region of relatively low-loss solutions [at the orders of <inline-formula><mml:math display="inline"><mml:mi>O</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mn>10</mml:mn><mml:mrow><mml:mo>-</mml:mo><mml:mn>4.5</mml:mn></mml:mrow></mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mi>–</mml:mi><mml:mi>O</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mn>10</mml:mn><mml:mrow><mml:mo>-</mml:mo><mml:mn>4</mml:mn></mml:mrow></mml:msup><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>] detected by PINNs.<fn id="fn9"><label><sup>9</sup></label><p>In the heat map of Fig. 5 in <xref ref-type="bibr" rid="c12">[12]</xref> the unitarity losses inside the <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula> contour are of the order of <inline-formula><mml:math display="inline"><mml:msup><mml:mn>10</mml:mn><mml:mrow><mml:mo>-</mml:mo><mml:mn>8</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>. Here, we are referring to the losses of the PINN solutions right above that region and below the dual bound.</p></fn> In addition, our heat map has a characteristic upward tail that trails closely the dual bound in the vicinity of two points: one at <inline-formula><mml:math display="inline"><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo>,</mml:mo><mml:mi>d</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>≃</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mn>3</mml:mn><mml:mo>,</mml:mo><mml:mn>0.65</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> and another at <inline-formula><mml:math display="inline"><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo>,</mml:mo><mml:mi>d</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>≃</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mn>2.4</mml:mn><mml:mo>,</mml:mo><mml:mn>0.9</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>. The first point is tantalizingly close to the values <inline-formula><mml:math display="inline"><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo>,</mml:mo><mml:mi>d</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:msqrt><mml:mfrac><mml:mn>3</mml:mn><mml:mn>8</mml:mn></mml:mfrac></mml:msqrt><mml:mo stretchy="false">(</mml:mo><mml:mn>5</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo>∼</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mn>3.06</mml:mn><mml:mo>,</mml:mo><mml:mn>0.61</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> of one of the finite partial wave solutions with <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:math></inline-formula> that the PINNs detect. The second point is similarly close to the values <inline-formula><mml:math display="inline"><mml:mo stretchy="false">(</mml:mo><mml:mi>c</mml:mi><mml:mo>,</mml:mo><mml:mi>d</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mn>5</mml:mn><mml:mn>4</mml:mn></mml:mfrac><mml:msqrt><mml:mfrac><mml:mn>3</mml:mn><mml:mn>7</mml:mn></mml:mfrac></mml:msqrt><mml:mo stretchy="false">(</mml:mo><mml:mn>3</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo>∼</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mn>2.45</mml:mn><mml:mo>,</mml:mo><mml:mn>0.82</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> of the second finite partial wave solution with <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:math></inline-formula> that the PINNs detect. Near the second point, our red region violates slightly the dual bound and so does a similar yellow blob in Fig. <xref ref-type="fig" rid="f5">5</xref> from Ref. <xref ref-type="bibr" rid="c12">[12]</xref>. Interestingly, the NO of Sec. <xref ref-type="sec" rid="s4a2">IV A 2</xref>, without the mean fidelity index, was unable to detect these features in the quadratic heat map of Fig. <xref ref-type="fig" rid="f3">3</xref>, but the NOs with the mean fidelity index seem to have picked them up. That is another indication that <inline-formula><mml:math display="inline"><mml:mover accent="true"><mml:mi mathvariant="script">F</mml:mi><mml:mo stretchy="false">¯</mml:mo></mml:mover></mml:math></inline-formula> is a promising measure for the detection of real features learned by the NOs.</p><p><italic>Detecting false predictions</italic>. We provided evidence that a high fidelity index (in the interval [0.99, 1.01]) can confidently assess that the prediction is a valid solution. Outside this interval the validity of the prediction is less clear, but it is natural to expect that a low fidelity index will be associated more frequently with a false prediction. We would next like to examine more closely how the fidelity index behaves in situations where the NO fails. For that purpose, we return to the finite partial wave amplitudes with <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>4</mml:mn></mml:math></inline-formula>, which proved to be a challenge for the NOs of Sec. <xref ref-type="sec" rid="s4a2">IV A 2</xref>.</p><p>In Fig. <xref ref-type="fig" rid="f6">6</xref> we plot the fidelity index and some of the actual predictions from the 56 NOs trained on a combination of true and false samples. The NOs have been evaluated on the modulus of a randomly chosen <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>4</mml:mn></mml:math></inline-formula> amplitude, which is depicted on the top left plot of Fig. <xref ref-type="fig" rid="f6">6</xref>. On the top right plot, we present the values of the fidelity index for each of the trained NOs. The vast majority of the NOs exhibit a low index with mean value 0.499; this is consistent with the fact that the NOs fail to correctly reproduce the corresponding phase, as one can explicitly check by plotting the predicted output for <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> against the exact result. This is useful: the NOs fail to generalize in this case, but they recognize correctly that this is the case and provide a clear indication of that information in the output.</p><p>Looking closer at the fidelity indices for each of the 56 NOs in the top right plot of Fig. <xref ref-type="fig" rid="f6">6</xref>, we also notice that out of the 56 NOs only two have a fidelity index within the interval [0.95, 1]. They are denoted with a red circle in the top right plot of Fig. <xref ref-type="fig" rid="f6">6</xref>. These two points correspond to fidelity indices 0.957 and 0.962. According to the previous discussion, they lie outside the region that captures a confident true prediction. The predicted <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> for these NOs are presented at the bottom plot of Fig. <xref ref-type="fig" rid="f6">6</xref> against the exact result, denoted by the thick gray curve, in blue and orange, respectively. The orange prediction, which has the higher fidelity index 0.962, is clearly better and qualitatively closer to an acceptable solution. It is a smoother function within the interval <inline-formula><mml:math display="inline"><mml:mo stretchy="false">[</mml:mo><mml:mo>-</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula> (as one would expect from the sine of a real function), in contrast to the blue prediction that is chaotic and outside the interval <inline-formula><mml:math display="inline"><mml:mo stretchy="false">[</mml:mo><mml:mo>-</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">]</mml:mo></mml:math></inline-formula>. The NO has recognized the importance of these features and has assigned a higher fidelity index to the orange prediction. In principle, it is impossible to exclude the orange prediction as false, but the fact that it stands as a clear outlier in the statistics of Fig. <xref ref-type="fig" rid="f6">6</xref>, and that the mean fidelity index is very low with small deviation, suggest that the orange prediction is likely false.</p><p>In conclusion, the above observations support using NOs within a statistical framework. In general situations, we propose the following approach: When the mean fidelity index suggests that a prediction should be rejected (as in Fig. <xref ref-type="fig" rid="f6">6</xref>), it should be discarded as potentially false. When the mean fidelity index is high (within the interval [0.99, 1.01]), one should accept the prediction as correct with high probability and extract predictions using the pointwise average of the predicted functions across the collection of NOs. Useful information can also be extracted by the pointwise standard deviation of the predicted functions.<fn id="fn10"><label><sup>10</sup></label><p>A similar statistical approach was also advocated in the optimization schemes of Refs. <xref ref-type="bibr" rid="c38 c39">[38,39]</xref>. In that context, the average of independent stochastic optimization runs (especially, those based on reinforcement learning) was always observed to provide better approximations.</p></fn></p></sec></sec></sec><sec id="s5"><label>V.</label><title>AMBIGUOUS PHASES</title><p>The problem of <inline-formula><mml:math display="inline"><mml:mi>S</mml:mi></mml:math></inline-formula>-matrix phases is interesting for an additional reason. So far we have operated under the assumption that for a given modulus <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> there is a unique solution for the phase <inline-formula><mml:math display="inline"><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> (up to trivial ambiguities) or that there is no solution at all. As we briefly reviewed in Sec. <xref ref-type="sec" rid="s2">II</xref>, there are also cases of finely tuned moduli that admit a doubly ambiguous phase. Such cases were studied by several papers in the 1960s and 1970s and still lack a general complete classification. More recently, Ref. <xref ref-type="bibr" rid="c12">[12]</xref> revisited the construction of such solutions using the PINN approach. In this section, we would like to explore if we can detect the ambiguous solutions of infinite partial wave amplitudes by training NOs on unique and ambiguous solutions of finite partial wave amplitudes. For the training we are going to use the fully classified ambiguous amplitudes with <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:math></inline-formula> <xref ref-type="bibr" rid="c23 c24 c25">[23–25]</xref>. Clearly, this task will be much more subtle and demanding, compared to the generic configurations we have been discussing so far.</p><sec id="s5a"><label>A.</label><title>Brief note on <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mtext> </mml:mtext><mml:mn>3</mml:mn></mml:mrow></mml:math></inline-formula> amplitudes with phase ambiguities</title><p>To generate training samples for <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:math></inline-formula> amplitudes with ambiguous phases, we used the classification developed in <xref ref-type="bibr" rid="c25">[25]</xref>. Here, we briefly review the relevant construction and note some minor discrepancies in the original paper <xref ref-type="bibr" rid="c25">[25]</xref>.</p><p>The approach of <xref ref-type="bibr" rid="c25">[25]</xref> involves an alternative decomposition of the partial wave amplitude in terms of the forward scattering amplitude (at <inline-formula><mml:math display="inline"><mml:mi>θ</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:math></inline-formula>) as <disp-formula id="d22"><mml:math display="block"><mml:mrow><mml:mi>F</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mi>F</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:munderover><mml:mrow><mml:mo>∏</mml:mo></mml:mrow><mml:mrow><mml:mi>l</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>L</mml:mi></mml:mrow></mml:munderover><mml:mfrac><mml:mrow><mml:mi>z</mml:mi><mml:mo stretchy="false">-</mml:mo><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mn>1</mml:mn><mml:mo stretchy="false">-</mml:mo><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>.</mml:mo></mml:mrow></mml:math><label>(22)</label></disp-formula>In this representation, all possible amplitudes with the same modulus at fixed <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi></mml:math></inline-formula> can be obtained by acting on <xref ref-type="disp-formula" rid="d22">(22)</xref> with the transformations <xref ref-type="bibr" rid="c40">[40]</xref> <disp-formula id="d23"><mml:math display="block"><mml:mrow><mml:mi>S</mml:mi><mml:mtext> </mml:mtext><mml:mo>:</mml:mo><mml:mtext> </mml:mtext><mml:mi>Re</mml:mi><mml:mi>F</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo id="d23a1" stretchy="false">→</mml:mo><mml:mo>-</mml:mo><mml:mi>Re</mml:mi><mml:mi>F</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:mo>,</mml:mo><mml:mspace linebreak="newline"/><mml:msub><mml:mrow><mml:mi>T</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mtext> </mml:mtext><mml:mo>:</mml:mo><mml:mtext> </mml:mtext><mml:mtext> </mml:mtext><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo indentalign="id" indenttarget="d23a1" stretchy="false">→</mml:mo><mml:msubsup><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mi>l</mml:mi></mml:mrow><mml:mrow><mml:mo>*</mml:mo></mml:mrow></mml:msubsup><mml:mo>.</mml:mo></mml:mrow></mml:math><label>(23)</label></disp-formula>Combinations of the above symmetries are also allowed as long as they do not lead to phases that are trivially related by sending <inline-formula><mml:math display="inline"><mml:msub><mml:mi>δ</mml:mi><mml:mo>ℓ</mml:mo></mml:msub><mml:mo stretchy="false">→</mml:mo><mml:mo>-</mml:mo><mml:msub><mml:mi>δ</mml:mi><mml:mo>ℓ</mml:mo></mml:msub></mml:math></inline-formula>. Defining the variables <inline-formula><mml:math display="inline"><mml:msub><mml:mi>ζ</mml:mi><mml:mo>ℓ</mml:mo></mml:msub><mml:mo>≔</mml:mo><mml:msup><mml:mi>e</mml:mi><mml:mrow><mml:mn>2</mml:mn><mml:mi>i</mml:mi><mml:msub><mml:mi>δ</mml:mi><mml:mo>ℓ</mml:mo></mml:msub></mml:mrow></mml:msup></mml:math></inline-formula>, it is straightforward to equate <xref ref-type="disp-formula" rid="d11">(11)</xref> with <xref ref-type="disp-formula" rid="d22">(22)</xref> and solve for <inline-formula><mml:math display="inline"><mml:msub><mml:mi>ζ</mml:mi><mml:mo>ℓ</mml:mo></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:msub><mml:mi>W</mml:mi><mml:mo>ℓ</mml:mo></mml:msub><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>. One can then look for ambiguous solutions for the phase shifts <inline-formula><mml:math display="inline"><mml:msub><mml:mi>δ</mml:mi><mml:mo>ℓ</mml:mo></mml:msub></mml:math></inline-formula> by requiring that (i) the <inline-formula><mml:math display="inline"><mml:mo stretchy="false">|</mml:mo><mml:msub><mml:mi>ζ</mml:mi><mml:mo>ℓ</mml:mo></mml:msub><mml:mo stretchy="false">|</mml:mo></mml:math></inline-formula> are left invariant by the transformations <xref ref-type="disp-formula" rid="d23">(23)</xref> and (ii) <inline-formula><mml:math display="inline"><mml:mo stretchy="false">|</mml:mo><mml:msub><mml:mi>ζ</mml:mi><mml:mo>ℓ</mml:mo></mml:msub><mml:mo stretchy="false">|</mml:mo><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:math></inline-formula>, which is equivalent to imposing that the scattering is elastic. For <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:math></inline-formula> this procedure leads to a real one-parameter family of twofold ambiguous phases (that are not trivially related) for specific intervals on the real line, as reported in Tables 1 and 2 of <xref ref-type="bibr" rid="c25">[25]</xref>.</p><p>More specifically, for <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:math></inline-formula> the only independent transformation that does not lead to trivially related ambiguous phases is <inline-formula><mml:math display="inline"><mml:mi>S</mml:mi><mml:msub><mml:mi>T</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:math></inline-formula>. Following the above steps this recovers the real one-parameter family of twofold ambiguous solutions of <xref ref-type="bibr" rid="c24">[24]</xref>, including the Crichton ambiguity <xref ref-type="bibr" rid="c23">[23]</xref>.</p><p>For <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>3</mml:mn></mml:math></inline-formula> the only independent transformations that do not lead to trivially related ambiguous phases are <inline-formula><mml:math display="inline"><mml:msub><mml:mi>T</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math display="inline"><mml:mi>S</mml:mi><mml:msub><mml:mi>T</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:math></inline-formula>. Analyzing the various possibilities leads to two classes of twofold ambiguous families of solutions arising for each of the <inline-formula><mml:math display="inline"><mml:msub><mml:mi>T</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:math></inline-formula> and <inline-formula><mml:math display="inline"><mml:mi>S</mml:mi><mml:msub><mml:mi>T</mml:mi><mml:mn>1</mml:mn></mml:msub></mml:math></inline-formula> transformations. In this context, we report the following disagreement with two of the expressions in <xref ref-type="bibr" rid="c25">[25]</xref>. We find <disp-formula id="d24"><mml:math display="block"><mml:mrow><mml:mi>cos</mml:mi><mml:mi>η</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mfrac><mml:mrow><mml:mfrac><mml:mrow><mml:mn>15</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mi>x</mml:mi></mml:mrow></mml:mfrac><mml:mo>+</mml:mo><mml:mfrac><mml:mrow><mml:mn>3</mml:mn></mml:mrow><mml:mrow><mml:mn>7</mml:mn></mml:mrow></mml:mfrac></mml:mrow><mml:mrow><mml:mo stretchy="false">|</mml:mo><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msup><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:mfrac><mml:mo>+</mml:mo><mml:mfrac><mml:mrow><mml:mn>135</mml:mn><mml:mo stretchy="false">|</mml:mo><mml:msup><mml:mrow><mml:mi>H</mml:mi></mml:mrow><mml:mrow><mml:mo>′</mml:mo></mml:mrow></mml:msup><mml:msup><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:mfrac><mml:mo>+</mml:mo><mml:mfrac><mml:mrow><mml:mn>8</mml:mn><mml:mi>x</mml:mi></mml:mrow><mml:mrow><mml:mn>15</mml:mn></mml:mrow></mml:mfrac><mml:mo>+</mml:mo><mml:mfrac><mml:mrow><mml:mn>45</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:mi>x</mml:mi></mml:mrow></mml:mfrac><mml:mo>+</mml:mo><mml:mn>8</mml:mn></mml:mrow><mml:mrow><mml:mn>3</mml:mn><mml:mo stretchy="false">|</mml:mo><mml:msup><mml:mrow><mml:mi>H</mml:mi></mml:mrow><mml:mrow><mml:mo>′</mml:mo></mml:mrow></mml:msup><mml:mo stretchy="false">|</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mn>4</mml:mn><mml:mi>x</mml:mi><mml:mo>+</mml:mo><mml:mn>30</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:mrow></mml:math><label>(24)</label></disp-formula><disp-formula id="d25"><mml:math display="block"><mml:mrow><mml:mo stretchy="false">|</mml:mo><mml:msub><mml:mrow><mml:mi>W</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:msup><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mn>252</mml:mn><mml:msubsup><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mn>5</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:mrow><mml:mrow><mml:mn>5</mml:mn><mml:msub><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mn>7</mml:mn></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mn>7</mml:mn><mml:msubsup><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mn>5</mml:mn></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup><mml:mo stretchy="false">+</mml:mo><mml:mn>9</mml:mn><mml:msub><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">|</mml:mo><mml:mi>A</mml:mi><mml:msup><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mrow></mml:mfrac><mml:mo>-</mml:mo><mml:mfrac><mml:mrow><mml:mn>2</mml:mn><mml:msub><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">+</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mn>5</mml:mn><mml:msub><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mn>5</mml:mn></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>+</mml:mo><mml:mfrac><mml:mrow><mml:mo stretchy="false">(</mml:mo><mml:mn>2</mml:mn><mml:msub><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">+</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo><mml:msub><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mn>5</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>c</mml:mi></mml:mrow><mml:mrow><mml:mn>7</mml:mn></mml:mrow></mml:msub><mml:mo stretchy="false">|</mml:mo><mml:mi>A</mml:mi><mml:msup><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:mfrac></mml:mrow></mml:math><label>(25)</label></disp-formula>for (A.5) and (A.14) in <xref ref-type="bibr" rid="c25">[25]</xref>, respectively.<fn id="fn11"><label><sup>11</sup></label><p>Note that in <xref ref-type="bibr" rid="c25">[25]</xref> what we call <inline-formula><mml:math display="inline"><mml:msub><mml:mi>W</mml:mi><mml:mi>l</mml:mi></mml:msub></mml:math></inline-formula> is denoted as <inline-formula><mml:math display="inline"><mml:msub><mml:mi>F</mml:mi><mml:mi>l</mml:mi></mml:msub></mml:math></inline-formula>.</p></fn> We are in agreement, however, with all other formulas, as well as the conclusions of the analysis of <xref ref-type="bibr" rid="c25">[25]</xref> as presented in their Tables 1 and 2.</p></sec><sec id="s5b"><label>B.</label><title>Neural operators on the double cover</title><p>In order to incorporate the possibility of amplitudes that have the same modulus and two inequivalent phases, we set up a 1D TFNO that takes a single input <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>, but outputs two <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>. On the <inline-formula><mml:math display="inline"><mml:mi>z</mml:mi></mml:math></inline-formula> grid with 100 collocation points this implies that the output is a 200-dimensional vector, which concatenates the 100-dimensional vectors of the two predictions. When the prediction is unique, the concatenated vectors are identical. We will report results without a fidelity index, but that is a feature that can be readily incorporated in this discussion.</p><sec id="s5b1"><label>1.</label><title>Hyperparameters and training</title><p><italic>Hyperparameters</italic>. Following a simple grid search, we observed a significantly larger dependence of the results on the NO hyperparameters for this problem. In what follows, we will report results based on NOs with essentially the same hyperparameters as in Sec. <xref ref-type="sec" rid="s4a1">IV A 1</xref>. The only hyperparameters that differ are the number of projection channels (we chose 256 instead of 512) and the number of layers (we chose 6 instead of 4). The resulting model has 72,745 parameters.</p><p><italic>Training</italic>. We attempted training with several types of datasets involving different ratios of unique and ambiguous solutions. Since we were limited to a relatively small range of amplitudes with ambiguous phases at <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:math></inline-formula>, we could not significantly increase the total number of samples, which in turn made the training less efficient. The results presented below are based on a dataset with a total number of 100,000 randomly chosen samples and the following split: 30,000 random <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>3</mml:mn></mml:math></inline-formula> amplitudes assumed to be unique, as well as 10,000 <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:mrow></mml:math></inline-formula> and 60,000 <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:math></inline-formula> amplitudes with ambiguous phases sampled randomly across the different families of solutions summarized in the previous subsection. We trained on 99,000 of these samples and reserved 1000 samples for testing.</p><p>We present the results of two, independently trained, NOs with the same hyperparameters, which were trained for 6500 epochs.</p></sec><sec id="s5b2"><label>2.</label><title>Tests and observations</title><p>Once again, the NOs test well within the training-test dataset. Our purpose here is to explore whether they can achieve any sensible generalization outside their immediate training domain. We will not attempt an exhaustive analysis, opting instead for the study of a few examples for illustration purposes. Specifically, we will focus on the performance of (a) the NOs on the linear and quadratic moduli of Fig. <xref ref-type="fig" rid="f2">2</xref> [<inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>10</mml:mn></mml:mfrac><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo>+</mml:mo><mml:mn>4</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> and <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>] that have an infinite partial wave expansion and no phase ambiguities and (b) one of the solutions with phase ambiguities in Ref. <xref ref-type="bibr" rid="c41">[41]</xref>—with parameter <inline-formula><mml:math display="inline"><mml:msub><mml:mi>z</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>6</mml:mn><mml:mn>5</mml:mn></mml:mfrac><mml:mo>+</mml:mo><mml:mfrac><mml:mn>3</mml:mn><mml:mn>5</mml:mn></mml:mfrac><mml:mi>i</mml:mi></mml:math></inline-formula>—that was also discussed in Ref. <xref ref-type="bibr" rid="c12">[12]</xref>; see e.g. Fig. 12 of that paper.</p><p>In Fig. <xref ref-type="fig" rid="f7">7</xref> we present the predictions of the two NOs for the linear and quadratic moduli. In both cases, the two predicted phases are close to each other and close to the unique exact phase, but the accuracy of the results is obviously lower compared to the results of the previous sections. This is reasonable, since we only trained with 30,000 unique samples (compared to 300,000 unique samples in Sec. <xref ref-type="sec" rid="s4a2">IV A 2</xref>).</p><fig id="f7"><object-id>7</object-id><object-id pub-id-type="doi">10.1103/PhysRevD.110.045020.f7</object-id><label>FIG. 7.</label><caption><p>The left column displays the exact <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> (gray curve) and the two predictions of the first NO (blue and orange) for the linear and quadratic moduli <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>10</mml:mn></mml:mfrac><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo>+</mml:mo><mml:mn>4</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> and <inline-formula><mml:math display="inline"><mml:mi>B</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mn>2</mml:mn></mml:mfrac><mml:mo stretchy="false">(</mml:mo><mml:msup><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:msup><mml:mo>+</mml:mo><mml:mn>1</mml:mn><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula>. The right column displays the corresponding quantities for the second NO. Both NOs were trained on the same dataset and with the same hyperparameters.</p></caption><graphic xlink:href="e045020_7.eps"/></fig><p>In Fig. <xref ref-type="fig" rid="f8">8</xref> we display the corresponding predictions of the two NOs for the Atkinson <italic>et al.</italic> <xref ref-type="bibr" rid="c41">[41]</xref> <inline-formula><mml:math display="inline"><mml:msub><mml:mi>z</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>6</mml:mn><mml:mn>5</mml:mn></mml:mfrac><mml:mo>+</mml:mo><mml:mfrac><mml:mn>3</mml:mn><mml:mn>5</mml:mn></mml:mfrac><mml:mi>i</mml:mi></mml:math></inline-formula> modulus. The first NO detects both phases, but the second detects only one of them. More generally, over several runs we observed that properly trained NOs would see either one or both solutions. More frequently, they would detect only one solution (the same one that the second NO detects in Fig. <xref ref-type="fig" rid="f8">8</xref>).</p><fig id="f8"><object-id>8</object-id><object-id pub-id-type="doi">10.1103/PhysRevD.110.045020.f8</object-id><label>FIG. 8.</label><caption><p>The left plot displays the two predictions of the first NO (blue and orange) against the solutions for <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mrow><mml:mi>ϕ</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> for the Atkinson <italic>et al.</italic> modulus with <inline-formula><mml:math display="inline"><mml:msub><mml:mi>z</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mn>6</mml:mn><mml:mn>5</mml:mn></mml:mfrac><mml:mo>+</mml:mo><mml:mfrac><mml:mn>3</mml:mn><mml:mn>5</mml:mn></mml:mfrac><mml:mi>i</mml:mi></mml:math></inline-formula>. The right plot displays the predictions of the second NO. The first NO detected both solutions, while the second NO only one. Both NOs were trained on the same dataset, with the same hyperparameters and for the same number of epochs.</p></caption><graphic xlink:href="e045020_8.eps"/></fig><p>We also tested the above NOs on the <inline-formula><mml:math display="inline"><mml:msub><mml:mi>z</mml:mi><mml:mn>1</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn>0.31</mml:mn><mml:mo>+</mml:mo><mml:mn>0.95</mml:mn><mml:mi>i</mml:mi></mml:math></inline-formula> ambiguous amplitude of Ref. <xref ref-type="bibr" rid="c12">[12]</xref> that has <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>μ</mml:mi><mml:mo>≃</mml:mo><mml:mn>1.67</mml:mn></mml:math></inline-formula>; see Fig. 15 in that paper. The NOs predicted a unique output partially approximating one of the solutions of Ref. <xref ref-type="bibr" rid="c12">[12]</xref> with low accuracy. We observed that the prediction was more sensitive (compared to other inputs) to the precise numerics of the input modulus. This is an expected difficulty in general, as it involves generalization to measure-zero configurations and we have no dynamical way to tune the input modulus.</p><p>It would be interesting to explore if these problems can be addressed in the following manner: Still within the setting of not invoking the unitarity equation, one could first train a NO (or an ensemble of NOs) to produce a (mean) prediction of two <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi></mml:math></inline-formula> and a (mean) fidelity index. Then, in search of ambiguous solutions within the class where the NOs can generalize, one could run a PINN with a NN that models the modulus <inline-formula><mml:math display="inline"><mml:msub><mml:mi>B</mml:mi><mml:mi>θ</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> (with <inline-formula><mml:math display="inline"><mml:mi>θ</mml:mi></mml:math></inline-formula> the NN parameters) and a loss function that has two contributions: (i) a repulsive potential for the two <inline-formula><mml:math display="inline"><mml:mi>sin</mml:mi><mml:mi>ϕ</mml:mi></mml:math></inline-formula> and (ii) a potential involving the (mean) fidelity index, e.g. of the form <inline-formula><mml:math display="inline"><mml:mo stretchy="false">(</mml:mo><mml:mi mathvariant="script">F</mml:mi><mml:mo>-</mml:mo><mml:mn>1</mml:mn><mml:msup><mml:mo stretchy="false">)</mml:mo><mml:mn>2</mml:mn></mml:msup></mml:math></inline-formula>. Both contributions are functionals of <inline-formula><mml:math display="inline"><mml:msub><mml:mi>B</mml:mi><mml:mi>θ</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>z</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:math></inline-formula> and the idea would be to optimize the PINN parameters <inline-formula><mml:math display="inline"><mml:mi>θ</mml:mi></mml:math></inline-formula> so that it produces moduli with two inequivalent phases and a high fidelity index close to 1.</p></sec></sec></sec><sec id="s6"><label>VI.</label><title>CONCLUSIONS AND OUTLOOK</title><p>In this paper we used Fourier neural operators to study properties of amplitudes in elastic <inline-formula><mml:math display="inline"><mml:mn>2</mml:mn><mml:mo stretchy="false">→</mml:mo><mml:mn>2</mml:mn></mml:math></inline-formula> scattering processes. Unlike previous approaches, we did not invoke the unitarity equation <xref ref-type="disp-formula" rid="d5">(5)</xref> to relate the modulus and phase, but tried to extract information about this relation from supervised training on random amplitudes with a finite partial wave expansion and <inline-formula><mml:math display="inline"><mml:mrow><mml:mi>L</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn><mml:mo>,</mml:mo><mml:mn>2</mml:mn><mml:mo>,</mml:mo><mml:mn>3</mml:mn></mml:mrow></mml:math></inline-formula>. We observed that NOs can generalize nontrivially outside this class, successfully recovering (after a single training process) the heat maps of <xref ref-type="bibr" rid="c12">[12]</xref> for arbitrary linear and quadratic amplitude moduli. A similar approach was also applied to the twofold ambiguous phase solutions. Even though this case is generically much harder, as it concerns subtle properties of finely tuned configurations, it was nevertheless possible to demonstrate in specific examples that the NOs can generalize to recover two inequivalent phases for amplitudes with infinite partial wave expansions.</p><p>The question of how NOs generalize is not only central to this paper but also to the broader field of artificial intelligence (AI). The answer can depend on many factors, which are usually hard to identify: the nature of the training dataset, the choice of hyperparameters and the details of the training, to name but a few. In the main text, we observed that within our specific setup the NO could learn several—but not all—nontrivial properties of the underlying general structure. For example, it could generalize to a class of amplitudes of infinite partial wave expansions, but failed on amplitudes with finite partial wave expansions for <inline-formula><mml:math display="inline"><mml:mi>L</mml:mi><mml:mo>&gt;</mml:mo><mml:mn>3</mml:mn></mml:math></inline-formula>. Moreover, by simply training on true modulus-phase pairs, the NO could not detect the cases where a modulus is inadmissible. For that reason, it was crucial to train on both true and false samples, which were distinguished by an extra classifying label that we called the fidelity index. It was clear from several examples that this index could extract useful information about properties of scattering amplitudes, hidden inside the (inaccessible) unitarity equation <xref ref-type="disp-formula" rid="d5">(5)</xref>. We emphasized the importance of averaging over independent NOs and provided evidence that it can be used to increase the confidence of the predictions and reduce optimization noise during training, enabling us to isolate true system information. In particular, the mean fidelity index made the predictions more robust and allowed the NO to rate its own performance.</p><p>We are excited by the potential use of similar approaches in other—possibly harder—problems, where the underlying structure is obscure, i.e. it is impossible to directly solve a system of equations or to directly compute relevant quantities. For instance, it would be interesting to explore whether objects similar to the fidelity index can be defined (using NOs or other machine learning algorithms, especially generative AI algorithms) for other systems. In addition, the examples presented in this paper seem to indicate that by studying the statistics of learners for the same training dataset and hyperparameters, one can distill information about what this particular class of algorithms can—and cannot—learn without recourse to the unknown microscopics, hence providing a new road toward structures we do not yet understand.</p></sec></body><back><ack><title>ACKNOWLEDGMENTS</title><p>The work of V. N. was partially supported by the H.F.R.I. call “Basic research Financing (Horizontal support of all Sciences)” under the National Recovery and Resilience Plan “Greece 2.0” funded by the European Union—NextGenerationEU (H.F.R.I. Project No. 15384). The work of C. P. was partially supported by the Science and Technology Facilities Council (STFC) Consolidated Grants No. ST/T000686/1 and No. ST/X00063X/1 “Amplitudes, Strings &amp; Duality.” Calculations were performed using the Sulis tier 2 HPC platform hosted by the Scientific Computing Research Technology Platform at the University of Warwick. Sulis is funded by EPSRC Grant No. EP/T022108/1 and the HPC Midlands+ consortium.</p></ack><ref-list><ref id="c1"><label>[1]</label><mixed-citation publication-type="journal"><object-id>1</object-id><person-group person-group-type="author"><string-name>J. M. Maldacena</string-name></person-group>, <article-title>The large N limit of superconformal field theories and supergravity</article-title>, <source>Adv. Theor. Math. Phys.</source> <volume>2</volume>, <page-range>231</page-range> (<year>1998</year>).<issn>1095-0761</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.4310/ATMP.1998.v2.n2.a1</pub-id></mixed-citation></ref><ref id="c2"><label>[2]</label><mixed-citation publication-type="journal"><object-id>2</object-id><person-group person-group-type="author"><string-name>D. Poland</string-name>, <string-name>S. Rychkov</string-name>, and <string-name>A. Vichi</string-name></person-group>, <article-title>The conformal bootstrap: Theory, numerical techniques, and applications</article-title>, <source>Rev. Mod. Phys.</source> <volume>91</volume>, <page-range>015002</page-range> (<year>2019</year>).<pub-id pub-id-type="coden">RMPHAT</pub-id><issn>0034-6861</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1103/RevModPhys.91.015002</pub-id></mixed-citation></ref><ref id="c3"><label>[3]</label><mixed-citation publication-type="eprint"><object-id>3</object-id><person-group person-group-type="author"><string-name>S. Rychkov</string-name> and <string-name>N. Su</string-name></person-group>, <article-title>New developments in the numerical conformal bootstrap</article-title>, <pub-id pub-id-type="arxiv">arXiv:2311.15844</pub-id>.</mixed-citation></ref><ref id="c4"><label>[4]</label><mixed-citation publication-type="eprint"><object-id>4</object-id><person-group person-group-type="author"><string-name>M. Kruczenski</string-name>, <string-name>J. Penedones</string-name>, and <string-name>B. C. van Rees</string-name></person-group>, <article-title>Snowmass white paper: <inline-formula><mml:math display="inline"><mml:mi>S</mml:mi></mml:math></inline-formula>-matrix bootstrap</article-title>, <pub-id pub-id-type="arxiv">arXiv:2203.02421</pub-id>.</mixed-citation></ref><ref id="c5"><label>[5]</label><mixed-citation publication-type="journal"><object-id>5</object-id><person-group person-group-type="author"><string-name>N. Kovachki</string-name>, <string-name>Z. Li</string-name>, <string-name>B. Liu</string-name>, <string-name>K. Azizzadenesheli</string-name>, <string-name>K. Bhattacharya</string-name>, <string-name>A. Stuart</string-name>, and <string-name>A. Anandkumar</string-name></person-group>, <article-title>Neural operator: Learning maps between function spaces</article-title>, <source>J. Mach. Learn. Res.</source> <volume>24</volume>, <page-range>4061</page-range> (<year>2023</year>).<issn>1532-4435</issn></mixed-citation></ref><ref id="c6"><label>[6]</label><mixed-citation publication-type="eprint"><object-id>6</object-id><person-group person-group-type="author"><string-name>Z. Li</string-name>, <string-name>N. Kovachki</string-name>, <string-name>K. Azizzadenesheli</string-name>, <string-name>B. Liu</string-name>, <string-name>K. Bhattacharya</string-name>, <string-name>A. Stuart</string-name>, and <string-name>A. Anandkumar</string-name></person-group>, <article-title>Fourier neural operator for parametric partial differential equations</article-title>, <pub-id pub-id-type="arxiv">arXiv:2010.08895</pub-id>.</mixed-citation></ref><ref id="c7"><label>[7]</label><mixed-citation publication-type="proc"><object-id>7</object-id><person-group person-group-type="author"><string-name>W. Johnny</string-name>, <string-name>H. Brigido</string-name>, <string-name>M. Ladeira</string-name>, and <string-name>J. C. F. Souza</string-name></person-group>, <article-title>Fourier neural operator for image classification</article-title>, in <source>2022 17th Iberian Conference on Information Systems and Technologies (CISTI)</source> (<year>2022</year>), <pub-id pub-id-type="doi" specific-use="display" xlink:href="https://doi.org/10.23919/CISTI54924.2022.9820128">10.23919/CISTI54924.2022.9820128</pub-id>.</mixed-citation></ref><ref id="c8"><label>[8]</label><mixed-citation publication-type="journal"><object-id>8</object-id><person-group person-group-type="author"><string-name>J. Xi</string-name>, <string-name>O. K. Ersoy</string-name>, <string-name>M. Cong</string-name>, <string-name>C. Zhao</string-name>, <string-name>W. Qu</string-name>, and <string-name>T. Wu</string-name></person-group>, <article-title>Wide and deep Fourier neural network for hyperspectral remote sensing image classification</article-title>, <source>Remote Sens.</source> <volume>14</volume>, <page-range>2931</page-range> (<year>2022</year>).<pub-id pub-id-type="coden">RSEND3</pub-id><pub-id pub-id-type="doi" specific-use="suppress-display">10.3390/rs14122931</pub-id></mixed-citation></ref><ref id="c9"><label>[9]</label><mixed-citation publication-type="proc"><object-id>9</object-id><person-group person-group-type="author"><string-name>S. Kabri</string-name>, <string-name>T. Roith</string-name>, <string-name>D. Tenbrinck</string-name>, and <string-name>M. Burger</string-name></person-group>, <article-title>Resolution-invariant image classification based on Fourier neural operators</article-title>, in <source>International Conference on Scale Space and Variational Methods in Computer Vision</source> <series>Lecture Notes in Computer Science</series> (<publisher-name>Springer</publisher-name>, Cham, <year>2023</year>), p. <page-range>236</page-range>.</mixed-citation></ref><ref id="c10"><label>[10]</label><mixed-citation publication-type="journal"><object-id>10</object-id><person-group person-group-type="author"><string-name>A. Kashefi</string-name> and <string-name>T. Mukerji</string-name></person-group>, <article-title>A novel Fourier neural operator framework for classification of multi-sized images: Application to three dimensional digital porous media</article-title>, <source>Phys. Fluids</source> <volume>36</volume>, <page-range>057131</page-range> (<year>2024</year>).<pub-id pub-id-type="coden">PHFLE6</pub-id><issn>1070-6631</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1063/5.0203977</pub-id></mixed-citation></ref><ref id="c11"><label>[11]</label><mixed-citation publication-type="journal"><object-id>11</object-id><person-group person-group-type="author"><string-name>M. Raissi</string-name>, <string-name>P. Perdikaris</string-name>, and <string-name>G. E. Karniadakis</string-name></person-group>, <article-title>Physics-informed neural networks: A deep learning framework for solving forward and inverse problems involving nonlinear partial differential equations</article-title>, <source>J. Comput. Phys.</source> <volume>378</volume>, <page-range>686</page-range> (<year>2019</year>).<pub-id pub-id-type="coden">JCTPAH</pub-id><issn>0021-9991</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1016/j.jcp.2018.10.045</pub-id></mixed-citation></ref><ref id="c12"><label>[12]</label><mixed-citation publication-type="journal"><object-id>12</object-id><person-group person-group-type="author"><string-name>A. Dersy</string-name>, <string-name>M. D. Schwartz</string-name>, and <string-name>A. Zhiboedov</string-name></person-group>, <article-title>Reconstructing <inline-formula><mml:math display="inline"><mml:mi>S</mml:mi></mml:math></inline-formula>-matrix phases with machine learning</article-title>, <source>J. High Energy Phys.</source> <issue>05</issue> (<volume>2024</volume>) <page-range>200</page-range>.<pub-id pub-id-type="coden">JHEPFG</pub-id><issn>1029-8479</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1007/JHEP05(2024)200</pub-id></mixed-citation></ref><ref id="c13"><label>[13]</label><mixed-citation publication-type="book"><object-id>13</object-id><person-group person-group-type="author"><string-name>L. Landau</string-name> and <string-name>E. Lifshitz</string-name></person-group>, <source>Quantum Mechanics: Non-Relativistic Theory</source> (<publisher-name>Elsevier Science</publisher-name>, New York, <year>1981</year>).</mixed-citation></ref><ref id="c14"><label>[14]</label><mixed-citation publication-type="journal"><object-id>14</object-id><person-group person-group-type="author"><string-name>U. Buck</string-name></person-group>, <article-title>Inversion of molecular scattering data</article-title>, <source>Rev. Mod. Phys.</source> <volume>46</volume>, <page-range>369</page-range> (<year>1974</year>).<pub-id pub-id-type="coden">RMPHAT</pub-id><issn>0034-6861</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1103/RevModPhys.46.369</pub-id></mixed-citation></ref><ref id="c15"><label>[15]</label><mixed-citation publication-type="book"><object-id>15</object-id><person-group person-group-type="author"><string-name>A. Martin</string-name></person-group>, <source>Scattering Theory: Unitarity, Analyticity and Crossing</source> <series>Lecture Notes in Physics</series> (<publisher-name>Springer Berlin</publisher-name>, Heidelberg, <year>2007</year>).</mixed-citation></ref><ref id="c16"><label>[16]</label><mixed-citation publication-type="journal"><object-id>16</object-id><person-group person-group-type="author"><string-name>M. Correia</string-name>, <string-name>A. Sever</string-name>, and <string-name>A. Zhiboedov</string-name></person-group>, <article-title>An analytical toolkit for the <inline-formula><mml:math display="inline"><mml:mi>S</mml:mi></mml:math></inline-formula>-matrix bootstrap</article-title>, <source>J. High Energy Phys.</source> <issue>03</issue> (<volume>2021</volume>) <page-range>013</page-range>.<pub-id pub-id-type="coden">JHEPFG</pub-id><issn>1029-8479</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1007/JHEP03(2021)013</pub-id></mixed-citation></ref><ref id="c17"><label>[17]</label><mixed-citation publication-type="journal"><object-id>17</object-id><person-group person-group-type="author"><string-name>R. G. Newton</string-name></person-group>, <article-title>Determination of the amplitude from the differential cross section by unitarity</article-title>, <source>J. Math. Phys. (N.Y.)</source> <volume>9</volume>, <page-range>2050</page-range> (<year>1968</year>).<pub-id pub-id-type="coden">JMAPAQ</pub-id><issn>0022-2488</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1063/1.1664543</pub-id></mixed-citation></ref><ref id="c18"><label>[18]</label><mixed-citation publication-type="journal"><object-id>18</object-id><person-group person-group-type="author"><string-name>D. Atkinson</string-name></person-group>, <article-title>Introduction to the use of non-linear techniques in <inline-formula><mml:math display="inline"><mml:mi>s</mml:mi></mml:math></inline-formula>-matrix theory</article-title>, <source>Acta Phys. Aust. Suppl.</source> <volume>7</volume>, <page-range>32</page-range> (<year>1970</year>).<pub-id pub-id-type="doi" specific-use="suppress-display">10.1007/978-3-7091-5835-7_2</pub-id></mixed-citation></ref><ref id="c19"><label>[19]</label><mixed-citation publication-type="journal"><object-id>19</object-id><person-group person-group-type="author"><string-name>A. Martin</string-name></person-group>, <article-title>Construction of the scattering amplitude from the differential cross-sections</article-title>, <source>Nuovo Cimento A</source> <volume>59</volume>, <page-range>131</page-range> (<year>1969</year>).<pub-id pub-id-type="coden">NCIAAT</pub-id><issn>0369-3546</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1007/BF02756351</pub-id></mixed-citation></ref><ref id="c20"><label>[20]</label><mixed-citation publication-type="journal"><object-id>20</object-id><person-group person-group-type="author"><string-name>A. D. Gangal</string-name> and <string-name>J. Kupsch</string-name></person-group>, <article-title>Determination of the scattering amplitude</article-title>, <source>Commun. Math. Phys.</source> <volume>93</volume>, <page-range>333</page-range> (<year>1984</year>).<pub-id pub-id-type="coden">CMPHAY</pub-id><issn>0010-3616</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1007/BF01258532</pub-id></mixed-citation></ref><ref id="c21"><label>[21]</label><mixed-citation publication-type="journal"><object-id>21</object-id><person-group person-group-type="author"><string-name>C. Itzykson</string-name> and <string-name>A. Martin</string-name></person-group>, <article-title>Phase-shift ambiguities for analytic amplitudes</article-title>, <source>Nuovo Cimento A</source> <volume>17</volume>, <page-range>245</page-range> (<year>1973</year>).<pub-id pub-id-type="coden">NCIAAT</pub-id><issn>0369-3546</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1007/BF02777935</pub-id></mixed-citation></ref><ref id="c22"><label>[22]</label><mixed-citation publication-type="journal"><object-id>22</object-id><person-group person-group-type="author"><string-name>A. Martin</string-name> and <string-name>J.-M. Richard</string-name></person-group>, <article-title>New result on phase shift analysis</article-title>, <source>Phys. Rev. D</source> <volume>101</volume>, <page-range>094014</page-range> (<year>2020</year>).<pub-id pub-id-type="coden">PRVDAQ</pub-id><issn>2470-0010</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1103/PhysRevD.101.094014</pub-id></mixed-citation></ref><ref id="c23"><label>[23]</label><mixed-citation publication-type="journal"><object-id>23</object-id><person-group person-group-type="author"><string-name>J. H. Crichton</string-name></person-group>, <article-title>Phase-shift ambiguities for spin-independent scattering</article-title>, <source>Il Nuovo Cimento A (1965–1970)</source> <volume>45</volume>, <page-range>256</page-range> (<year>1966</year>).<pub-id pub-id-type="doi" specific-use="suppress-display">10.1007/BF02738098</pub-id></mixed-citation></ref><ref id="c24"><label>[24]</label><mixed-citation publication-type="journal"><object-id>24</object-id><person-group person-group-type="author"><string-name>D. Atkinson</string-name>, <string-name>P. W. Johnson</string-name>, <string-name>N. Mehta</string-name>, and <string-name>M. De Roo</string-name></person-group>, <article-title>Crichton’s phase-shift ambiguity</article-title>, <source>Nucl. Phys.</source> <volume>B55</volume>, <page-range>125</page-range> (<year>1973</year>).<pub-id pub-id-type="coden">NUPBBO</pub-id><issn>0550-3213</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1016/0550-3213(73)90413-6</pub-id></mixed-citation></ref><ref id="c25"><label>[25]</label><mixed-citation publication-type="journal"><object-id>25</object-id><person-group person-group-type="author"><string-name>F. A. Berends</string-name> and <string-name>S. N. M. Ruijsenaars</string-name></person-group>, <article-title>Examples of phase-shift ambiguities for spinless elastic scattering</article-title>, <source>Nucl. Phys.</source> <volume>B56</volume>, <page-range>507</page-range> (<year>1973</year>).<pub-id pub-id-type="coden">NUPBBO</pub-id><issn>0550-3213</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1016/0550-3213(73)90044-8</pub-id></mixed-citation></ref><ref id="c26"><label>[26]</label><mixed-citation publication-type="journal"><object-id>26</object-id><person-group person-group-type="author"><string-name>H. Cornille</string-name> and <string-name>J. M. Drouffe</string-name></person-group>, <article-title>Phase-shift ambiguities for spinless and <inline-formula><mml:math display="inline"><mml:mrow><mml:mn>4</mml:mn><mml:mo>&gt;</mml:mo><mml:mo>=</mml:mo><mml:mi mathvariant="normal">l</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>max</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:math></inline-formula> elastic scattering</article-title>, <source>Nuovo Cimento A</source> <volume>20</volume>, <page-range>401</page-range> (<year>1974</year>).<pub-id pub-id-type="coden">NCIAAT</pub-id><issn>0369-3546</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1007/BF02821973</pub-id></mixed-citation></ref><ref id="c27"><label>[27]</label><mixed-citation publication-type="journal"><object-id>27</object-id><person-group person-group-type="author"><string-name>G. V. Cybenko</string-name></person-group>, <article-title>Approximation by superpositions of a sigmoidal function</article-title>, <source>Math. Control Signals Syst.</source> <volume>2</volume>, <page-range>303</page-range> (<year>1989</year>).<pub-id pub-id-type="coden">MCSYE8</pub-id><issn>0932-4194</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1007/BF02551274</pub-id></mixed-citation></ref><ref id="c28"><label>[28]</label><mixed-citation publication-type="eprint"><object-id>28</object-id><person-group person-group-type="author"><string-name>D. P. Kingma</string-name> and <string-name>J. Ba</string-name></person-group>, <article-title>Adam: A method for stochastic optimization</article-title>, <pub-id pub-id-type="arxiv">arXiv:1412.6980</pub-id>.</mixed-citation></ref><ref id="c29"><label>[29]</label><mixed-citation publication-type="journal"><object-id>29</object-id><person-group person-group-type="author"><string-name>T. Chen</string-name> and <string-name>H. Chen</string-name></person-group>, <article-title>Approximations of continuous functionals by neural networks with application to dynamic systems</article-title>, <source>IEEE Trans. Neural Networks</source> <volume>4</volume>, <page-range>910</page-range> (<year>1993</year>).<pub-id pub-id-type="coden">ITNNEP</pub-id><issn>1045-9227</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1109/72.286886</pub-id></mixed-citation></ref><ref id="c30"><label>[30]</label><mixed-citation publication-type="journal"><object-id>30</object-id><person-group person-group-type="author"><string-name>I. E. Lagaris</string-name>, <string-name>A. Likas</string-name>, and <string-name>D. I. Fotiadis</string-name></person-group>, <article-title>Artificial neural networks for solving ordinary and partial differential equations</article-title>, <source>IEEE Trans. Neural Networks</source> <volume>9</volume>, <page-range>987</page-range> (<year>1993</year>).<pub-id pub-id-type="coden">ITNNEP</pub-id><issn>1045-9227</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1109/72.712178</pub-id></mixed-citation></ref><ref id="c31"><label>[31]</label><mixed-citation publication-type="journal"><object-id>31</object-id><person-group person-group-type="author"><string-name>T. Chen</string-name> and <string-name>H. Chen</string-name></person-group>, <article-title>Universal approximation to nonlinear operators by neural networks with arbitrary activation functions and its application to dynamical systems</article-title>, <source>IEEE Trans. Neural Networks</source> <volume>64</volume>, <page-range>911</page-range> (<year>1995</year>).<pub-id pub-id-type="coden">ITNNEP</pub-id><issn>1045-9227</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1109/72.392253</pub-id></mixed-citation></ref><ref id="c32"><label>[32]</label><mixed-citation publication-type="journal"><object-id>32</object-id><person-group person-group-type="author"><string-name>L. Lu</string-name>, <string-name>P. Jin</string-name>, <string-name>G. Pang</string-name>, <string-name>Z. Zhang</string-name>, and <string-name>G. E. Karniadakis</string-name></person-group>, <article-title>Learning nonlinear operators via DeepONet based on the universal approximation theorem of operators</article-title>, <source>Nat. Mach. Intell.</source> <volume>3</volume>, <page-range>218</page-range> (<year>2019</year>).<pub-id pub-id-type="doi" specific-use="suppress-display">10.1038/s42256-021-00302-5</pub-id></mixed-citation></ref><ref id="c33"><label>[33]</label><mixed-citation publication-type="journal"><object-id>33</object-id><person-group person-group-type="author"><string-name>S. Mizera</string-name></person-group>, <article-title>Scattering with neural operators</article-title>, <source>Phys. Rev. D</source> <volume>108</volume>, <page-range>L101701</page-range> (<year>2023</year>).<pub-id pub-id-type="coden">PRVDAQ</pub-id><issn>2470-0010</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1103/PhysRevD.108.L101701</pub-id></mixed-citation></ref><ref id="c34"><label>[34]</label><mixed-citation publication-type="eprint"><object-id>34</object-id><person-group person-group-type="author"><string-name>Z. Li</string-name>, <string-name>H. Zheng</string-name>, <string-name>N. Kovachki</string-name>, <string-name>D. Jin</string-name>, <string-name>H. Chen</string-name>, <string-name>B. Liu</string-name>, <string-name>K. Azizzadenesheli</string-name>, and <string-name>A. Anandkumar</string-name></person-group>, <article-title>Physics-informed neural operator for learning partial differential equations</article-title>, <pub-id pub-id-type="arxiv">arXiv:2111.03794</pub-id>.</mixed-citation></ref><ref id="c35"><label>[35]</label><mixed-citation publication-type="journal"><object-id>35</object-id><person-group person-group-type="author"><string-name>F. Bhat</string-name>, <string-name>D. Chowdhury</string-name>, <string-name>A. Sinha</string-name>, <string-name>S. Tiwari</string-name>, and <string-name>A. Zahed</string-name></person-group>, <article-title>Bootstrapping high-energy observables</article-title>, <source>J. High Energy Phys.</source> <issue>03</issue> (<volume>2024</volume>) <page-range>157</page-range>.<pub-id pub-id-type="coden">JHEPFG</pub-id><issn>1029-8479</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1007/JHEP03(2024)157</pub-id></mixed-citation></ref><ref id="c36"><label>[36]</label><mixed-citation publication-type="eprint"><object-id>36</object-id><person-group person-group-type="author"><string-name>D. P. Kingma</string-name> and <string-name>J. Ba</string-name></person-group>, <article-title>Adam: A method for stochastic optimization</article-title>, <pub-id pub-id-type="arxiv">arXiv:1412.6980</pub-id>.</mixed-citation></ref><ref id="c37"><label>[37]</label><mixed-citation publication-type="eprint"><object-id>37</object-id><person-group person-group-type="author"><string-name>A. Shocher</string-name>, <string-name>N. Cohen</string-name>, and <string-name>M. Irani</string-name></person-group>, <article-title>“Zero-shot” super-resolution using deep internal learning</article-title>, <pub-id pub-id-type="arxiv">arXiv:1712.06087</pub-id>.</mixed-citation></ref><ref id="c38"><label>[38]</label><mixed-citation publication-type="journal"><object-id>38</object-id><person-group person-group-type="author"><string-name>G. Kántor</string-name>, <string-name>V. Niarchos</string-name>, <string-name>C. Papageorgakis</string-name>, and <string-name>P. Richmond</string-name></person-group>, <article-title>6D (2,0) bootstrap with the soft-actor-critic algorithm</article-title>, <source>Phys. Rev. D</source> <volume>107</volume>, <page-range>025005</page-range> (<year>2023</year>).<pub-id pub-id-type="coden">PRVDAQ</pub-id><issn>2470-0010</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1103/PhysRevD.107.025005</pub-id></mixed-citation></ref><ref id="c39"><label>[39]</label><mixed-citation publication-type="journal"><object-id>39</object-id><person-group person-group-type="author"><string-name>V. Niarchos</string-name>, <string-name>C. Papageorgakis</string-name>, <string-name>P. Richmond</string-name>, <string-name>A. G. Stapleton</string-name>, and <string-name>M. Woolley</string-name></person-group>, <article-title>Bootstrability in line-defect CFTs with improved truncation methods</article-title>, <source>Phys. Rev. D</source> <volume>108</volume>, <page-range>105027</page-range> (<year>2023</year>).<pub-id pub-id-type="coden">PRVDAQ</pub-id><issn>2470-0010</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1103/PhysRevD.108.105027</pub-id></mixed-citation></ref><ref id="c40"><label>[40]</label><mixed-citation publication-type="journal"><object-id>40</object-id><person-group person-group-type="author"><string-name>A. Gersten</string-name></person-group>, <article-title>Ambiguities of complex phase-shift analysis</article-title>, <source>Nucl. Phys.</source> <volume>B12</volume>, <page-range>537</page-range> (<year>1969</year>).<pub-id pub-id-type="coden">NUPBBO</pub-id><issn>0550-3213</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1016/0550-3213(69)90072-8</pub-id></mixed-citation></ref><ref id="c41"><label>[41]</label><mixed-citation publication-type="journal"><object-id>41</object-id><person-group person-group-type="author"><string-name>D. Atkinson</string-name>, <string-name>L. P. Kok</string-name>, and <string-name>M. de Roo</string-name></person-group>, <article-title>Crichton ambiguities with infinitely many partial waves</article-title>, <source>Phys. Rev. D</source> <volume>17</volume>, <page-range>2492</page-range> (<year>1978</year>).<pub-id pub-id-type="coden">PRVDAQ</pub-id><issn>0556-2821</issn><pub-id pub-id-type="doi" specific-use="suppress-display">10.1103/PhysRevD.17.2492</pub-id></mixed-citation></ref></ref-list></back></article>
