diff --git a/example/simul_fit.py b/example/simul_fit.py index 81de11709..783f86a92 100644 --- a/example/simul_fit.py +++ b/example/simul_fit.py @@ -8,21 +8,51 @@ # latex data, same sample usans and sans # particles radius ~2300, uniform dispersity -datasets = load_data(str(data_path / '1d_data' / 'latex_smeared.xml'), index='all') +all_data = load_data(str(data_path / '1d_data' / 'latex_smeared.xml'), index='all') #[print(data) for data in datasets] +datasets = all_data # Both SANS and USANS +#datasets = all_data[0:1] # SANS +#datasets = all_data[1:2] # USANS -# A single sphere model to share between the datasets. We will use +# A single model to share between the datasets. We will use # FreeVariables below to set the parameters that are independent between # the datasets. -kernel = load_model('sphere') -pars = dict(scale=0.01, background=0.0, sld=5.0, sld_solvent=0.0, radius=1500., - #radius_pd=0.1, radius_pd_n=35, - ) +model_name = "ellipsoid" +if model_name == "sphere": + kernel = load_model("sphere") + pars = dict( + scale=0.01, background=0.0, sld=5.0, sld_solvent=0.0, radius=1500., + #radius_pd=0.1, radius_pd_n=35, + ) +elif model_name == "ellipsoid": + kernel = load_model("ellipsoid") + pars = dict( + scale=0.01, background=0.0, sld=5.0, sld_solvent=0.0, + radius_polar=1500., radius_equatorial=1500., + ) +elif model_name == "triaxial_ellipsoid": + kernel = load_model("triaxial_ellipsoid", dtype="double!") + pars = dict( + scale=0.01, background=0.0, sld=5.0, sld_solvent=0.0, + radius_polar=1500., radius_equat_major=1500., radius_equat_minor=1500., + ) +else: + raise ValueError("Use model_name in sphere, ellipsoid, triaxial_ellipsoid") + model = Model(kernel, **pars) # radius and polydispersity (if any) are shared -model.radius.range(0, 3000.) -#model.radius_pd.range(0, 1) +if model_name == "sphere": + model.radius.range(0, 3000.) + # model.radius_pd.range(0, 1) +elif model_name == "ellipsoid": + model.radius_equatorial.range(0, 3000.) + model.radius_polar.range(0, 3000.) +elif model_name == "triaxial_ellipsoid": + model.radius_equat_major.range(0, 3000.) + model.radius_equat_minor.range(0, 3000.) + model.radius_polar.range(0, 3000.) + # Contrast and dilution are the same for both measurements, but are not # separable with a single measurement (i.e., I(q) ~ F(q) contrast^2 Vf), diff --git a/explore/check_adaptive.py b/explore/check_adaptive.py new file mode 100644 index 000000000..6f1df3e4a --- /dev/null +++ b/explore/check_adaptive.py @@ -0,0 +1,408 @@ +from math import sqrt + +from sasmodels import core, data, generate +from sasmodels.compare import tic +from sasmodels.direct_model import DirectModel + +# == Unnested == + +MODELS = {} +KERNELS = {} +NESTED = {} +UNNESTED = {} +def register(loops): + def wrapper(par_fn): + model_name = par_fn.__name__ + if loops == 1: + UNNESTED[model_name] = par_fn + else: + NESTED[model_name] = par_fn + return par_fn + return wrapper + +@register(1) +def core_shell_bicelle(ab, c, shell=0.1, sld_core=0): + thick_rim = thick_face = ab*shell/2 if shell < 1 else shell + length = c - 2*thick_face + radius = ab/2 - thick_rim + pars = dict( + radius=radius, thick_rim=thick_rim, thick_face=thick_face, length=length, + sld_core=sld_core, sld_face=1, sld_rim=1, sld_solvent=0, + ) + return pars + +@register(1) +def core_shell_cylinder(ab, c, shell=0.1, sld_core=0): + thickness = ab*shell/2 if shell < 1 else shell + length = c - 2*thickness + radius = ab/2 - thickness + pars = dict( + radius=radius, thickness=thickness, length=length, + sld_core=sld_core, sld_shell=1, sld_solvent=0, + ) + return pars + +@register(1) +def core_shell_ellipsoid(ab, c, shell=0.1, sld_core=0): + thick_shell = ab*shell/2 if shell < 1 else shell + radius_equat_core = ab/2 - thick_shell + x_polar_shell = 1 + x_core = (c/2 - thick_shell)/(ab/2 - thick_shell) + pars = dict( + radius_equat_core=radius_equat_core, x_core=x_core, + thick_shell=thick_shell, x_polar_shell=x_polar_shell, + sld_core=sld_core, sld_shell=1, sld_solvent=0, + ) + return pars + +@register(1) +def cylinder(ab, c): + pars = dict(sld=1, sld_solvent=0, radius=ab/2, length=c) + return pars + +@register(1) +def ellipsoid(ab, c): + pars = dict(sld=1, sld_solvent=0, radius_equatorial=ab/2, radius_polar=c/2) + return pars + +@register(1) +def flexible_cylinder(ab, c, n=10): + pars = dict( + length=c*n, kuhn_length=c, + radius=ab, + sld=1, sld_solvent=0, + ) + return pars + +@register(1) +def hollow_cylinder(ab, c, shell=0.1): + thickness = ab/2*shell + pars = dict( + radius=ab/2 - thickness, + thickness=thickness, + length=c, + sld=1, sld_solvent=0, + ) + return pars + +@register(1) +def stacked_disks(ab, c, shell=0.1, n=10, sld_core=0): + # Each stack (layer+core+layer) has total thickness length/n + # The shell is a portion of that total thickness, with the core being (1-portion) + # Therefore thick_core = length/n*(1-shell) and thick_layer = length/n*shell/2 + jitter = 0.05 + pars = dict( + thick_core=c/n*(1-shell), + thick_layer=c/n*shell/2, + radius=ab/2, + n_stacking=n, + sigma_d=jitter*(c/n), + sld_core=sld_core, + sld_layer=1, + sld_solvent=0, + ) + return pars + + +# == Nested == + +@register(2) +def barbell(a, b, c): + radius, radius_bell = a/2, b/2 + h = sqrt(radius_bell**2 - radius**2) + # c/2 = length/2 + radius_bell + h + length = max(c - 2*(radius_bell + h), 0) # "disk" fails because radius_bell > c + pars = dict(radius=radius, radius_bell=radius_bell, length=length, sld=1, sld_solvent=0) + return pars + +@register(2) +def capped_cylinder(a, b, c): + radius, radius_cap = a/2, b/2 + h = sqrt(radius_cap**2 - radius**2) + # c/2 = length/2 + radius_bell + h + length = max(c - 2*(radius_cap - h), 0) + pars = dict(radius=radius, radius_cap=radius_cap, length=length, sld=1, sld_solvent=0) + return pars + +@register(2) +def core_shell_bicelle_elliptical_belt_rough(a, b, c, shell=0.1, sld_core=0, roughness=0.1): + thick_rim = thick_face = a*shell/2 if shell < 1 else shell + length = c - 2*thick_face + radius = a/2 - thick_rim + x_core = (b/2 - thick_rim) / (a/2 - thick_rim) + pars = dict( + radius=radius, x_core=x_core, thick_rim=thick_rim, thick_face=thick_face, length=length, + sld_core=sld_core, sld_face=1, sld_rim=1, sld_solvent=0, + sigma=thick_rim*roughness, + ) + return pars + +@register(2) +def core_shell_bicelle_elliptical(a, b, c, shell=0.1, sld_core=0): + thick_rim = thick_face = a*shell/2 if shell < 1 else shell + length = c - 2*thick_face + radius = a/2 - thick_rim + x_core = (b/2 - thick_rim) / (a/2 - thick_rim) + pars = dict( + radius=radius, x_core=x_core, thick_rim=thick_rim, thick_face=thick_face, length=length, + sld_core=sld_core, sld_face=1, sld_rim=1, sld_solvent=0, + ) + return pars + +@register(2) +def core_shell_parallelepiped(a, b, c, shell=0.1, sld_core=0): + thickness = a*shell/2 if shell < 1 else shell + length_a, length_b, length_c = a - 2*thickness, b - 2*thickness, c - 2*thickness + pars = dict( + sld_core=sld_core, sld_a=1, sld_b=1, sld_c=1, sld_solvent=0, + length_a=length_a, length_b=length_b, length_c=length_c, + thick_rim_a=thickness, thick_rim_b=thickness, thick_rim_c=thickness, + ) + return pars + +@register(2) +def elliptical_cylinder(a, b, c): + pars = dict(radius_minor=a/2, axis_ratio=b/c, length=c, sld=1, sld_solvent=0) + return pars + +@register(2) +def flexible_cylinder_elliptical(a, b, c, n=10): + pars = dict( + length=c*n, kuhn_length=c, + radius=a, axis_ratio=b/a, + sld=1, sld_solvent=0, + ) + return pars + +@register(2) +def hollow_rectangular_prism_thin_walls(a, b, c): + pars = dict( + sld=1, sld_solvent=0, + length_a=a, b2a_ratio=b/a, c2a_ratio=c/a, + ) + return pars + +@register(2) +def hollow_rectangular_prism(a, b, c, shell=0.1): + thickness = a/2*shell + pars = dict( + sld=1, sld_solvent=0, + length_a=a, b2a_ratio=b/a, c2a_ratio=c/a, + thickness=thickness, + ) + return pars + +@register(2) +def parallelepiped(a, b, c): + pars = dict( + sld=1, sld_solvent=0, + length_a=a, length_b=b, length_c=c, + ) + return pars + +@register(2) +def pringle(a, b, c): + # It is nonsensical to have thickness >> radius, so c ought not be the "long" direction, + # however, this is how the loops are structured in the pringle model, so go with it. + # Let a be the radius of the disk under curvature (alpha==beta), and b be the uncurved radius. + alpha = beta = max(abs(1 - a/b), 0.001) + pars = dict(radius=b/2, thickness=c, alpha=alpha, beta=beta, sld=1, sld_solvent=0) + return pars + +@register(2) +def rectangular_prism(a, b, c): + pars = dict(sld=1, sld_solvent=0, length_a=a, b2a_ratio=b/a, c2a_ratio=c/a) + return pars + +@register(2) +def triaxial_ellipsoid(a, b, c): + pars = dict( + sld=1, sld_solvent=0, + radius_equat_minor=a, radius_equat_major=b, radius_polar=c, + ) + return pars + + +def check(models=[]): + import numpy as np + + def longlines(fn): + def wrapped(*args, **kw): + with np.printoptions( + linewidth=1000, + formatter={'float': '{:.14e}'.format}, + ): + return fn(*args, **kw) + return wrapped + + def get_kernel(model_name, n_gauss=0, dtype="double", platform="dll"): + # print(f"{model_name=} {n_gauss=} {dtype=} {platform=}") + if model_name not in MODELS: + model_info = core.load_model_info(model_name) + MODELS[model_name] = model_info + # set_integration_size() below overwrites info. Cache the adaptive version immediately + KERNELS[(model_name, 0, dtype, platform)] = core.build_model(model_info, dtype=dtype, platform=platform) + if (model_name, n_gauss, dtype, platform) not in KERNELS: + model_info = MODELS[model_name] + if n_gauss: + # ocl adaptive... this had better be the first or second get_kernel since set_integration_size is stateful + generate.set_integration_size(model_info, n_gauss) + KERNELS[(model_name, n_gauss, dtype, platform)] = core.build_model(model_info, dtype=dtype, platform=platform) + return KERNELS[(model_name, n_gauss, dtype, platform)] + + def timer(fn): + # Clear startup overhead + toc = tic() + fn() + dt = toc() + if dt > 1: + return dt + # loop + toc = tic() + fn() + dt = toc() + n = int(0.5 // dt) + for _ in range(n): fn() + return toc()/(n+1) + + @longlines + def run_test(model_name, pars, speed_q, test_n_gauss, test_q, tol): + + # Parameter string suitable for use with "python -m sasmodels.compare model par=value..." + par_str = " ".join(f"{k}={v}" for k, v in pars.items()) + # print(f"{model_name} {par_str} -ngauss={test_n_gauss}") + k_adaptive = get_kernel(model_name, n_gauss=0, dtype="double", platform="dll") + speed_data = data.empty_data1D(speed_q) + test_data = data.empty_data1D(test_q) + + Iq_test_adaptive = DirectModel(test_data, k_adaptive)(**pars) # clear startup overhead + dt_adaptive = timer(lambda: DirectModel(speed_data, k_adaptive)(**pars)) + + labelled = False + def print_label(): + nonlocal labelled + if not labelled: + print(f"{model_name} {par_str}") + labelled = True + + if dt_adaptive > speed_target: + print_label() + print(f"! ** {model_name} is slow: {dt_adaptive:.1f} s for {len(speed_q)} points in [{speed_q.min()}, {speed_q.max()}]") + + if speed_check: # compare speed of adaptive with + if speed_check == "gpu": + k_speed = get_kernel(model_name, n_gauss=0, dtype="single", platform="ocl") + elif speed_check == "gauss-76": + k_speed = get_kernel(model_name, n_gauss=76) + else: + raise ValueError(f"Unknown speed check: {speed_check}. Use gpu or gauss-76") + dt_speed = timer(lambda: DirectModel(speed_data, k_speed)(**pars)) + + delta = dt_speed/dt_adaptive - 1 + if delta >= 1 or delta <= -0.5 or dt_adaptive > 0.5: + print_label() + value = ( + f"{dt_speed/dt_adaptive:.1f}x slower" if delta >= 1 + else f"{dt_adaptive/dt_speed:.1f}x faster" if delta < -0.5 + else f"{int(delta*100)}% slower" if delta > 0.0 + else f"{int(-delta*100)}% faster" + ) + if delta > 0.2: + comment = f" [{speed_check} {value}]" + (" ***" if speed_check != "gauss-76" else "") + elif delta < -0.2: + comment = f" [{speed_check} {value}]" + (" ***" if speed_check == "gauss-76" else "") + elif dt_adaptive > 0.2: + comment = " [slow]" + else: + comment = "" + print(f" cpu double: {dt_adaptive*1000:.1f} ms {speed_check}: {dt_speed*1000:.1f} ms{comment}") + + if speed_only: # speed_only test + return + + k_accurate = get_kernel(model_name, n_gauss=test_n_gauss, dtype="double", platform="dll") + Iq_test_accurate = DirectModel(test_data, k_accurate)(**pars) + relerr = abs(Iq_test_adaptive - Iq_test_accurate)/Iq_test_accurate + index = relerr > tol + if index.any(): + print_label() + print(" qvalue", test_q[index]) + print(" target", Iq_test_accurate[index]) + print(" actual", Iq_test_adaptive[index]) + print(" relerr", relerr[index]) + + # Compare against 76-point gaussian + k_76 = get_kernel(model_name, n_gauss=76, dtype="double", platform="dll") + Iq_test_76 = DirectModel(test_data, k_76)(**pars) + relerr_76 = abs(Iq_test_76 - Iq_test_accurate)/Iq_test_accurate + index = (relerr_76 > tol) #& (relerr > tol) & (relerr > 10*relerr_76) + if index.any(): + print_label() + #print("! ** gauss-76 is better than adaptive for {model_name} at some q values") + print("! ** gauss-76 is out of tolerance") + print(" qvalue", test_q[index]) + print(" relerr", relerr[index]) + print(" rel-76", relerr_76[index]) + + speed_target = 2 + #speed_only = True + #speed_check = None + speed_only = False + speed_check = "gauss-76" # None | "" | "gpu" | "gauss-76" + big_n = 5000 + #small_n = 1000 + small_n = big_n + n_per_decade = 1 + print(f"""\ +== Speed and accuracy tests for all adaptive integration models == +* target evaluation time is {speed_target} s (running on a mac M2 chip) +* q in [1e-5, 1] with 40 points per decade for 201 points total +* large models tested against {big_n} point gaussian integration +* q=[5e-4, 1e-3, 2e-3] with tol=1e-5 relative (measured q) +* q=[0.01, 0.1] with tol=0.2 relative (slit resolution limits) +* small models tested against {small_n} point gaussian integration +* q in [1e-3, 1] with {n_per_decade} points per decade +!!!! These tests run very slowly --- don't use as part of CI !!!! +""") + def loop(aspect, a, b, c): + q = np.logspace(-5, 0, 201) # 5 decades, 40 points per decade + big = max(a, b, c) > 10000 # 1 μm + if big: + n_gauss = big_n + test_q = np.array([0.0005, 0.001, 0.002, 0.01, 0.1]) + test_tol = np.array([5e-5, 5e-5, 5e-5, 2e-1, 2e-1]) + else: + n_gauss = small_n + test_q = np.logspace(-3, 0, 3*n_per_decade + 1) # 3 decades, n points per decade + test_tol = np.full_like(test_q, 1e-10) + + print(f"\n\n=== {'big' if big else 'small'} {aspect}: {a=} {b=} {c=} ===") + ab = max(a, b) + for name, fn in UNNESTED.items(): + # skip models not listed + if models and name not in models: + continue + pars = dict(background=0, **fn(ab=ab, c=c)) + run_test(name, pars, q, n_gauss, test_q, test_tol) + + ab = max(a, b) + for name, fn in NESTED.items(): + # skip models not listed + if models and name not in models: + continue + pars = dict(background=0, **fn(a=a, b=b, c=c)) + run_test(name, pars, q, n_gauss, test_q, test_tol) + + # small size (20 nm) tests + loop("rods", a=20, b=40, c=200) + loop("disks", a=180, b=200, c=40) + loop("cubes", a=200, b=200, c=200) + + # large size (20 μm) tests + loop("rods", a=1_000, b=2000, c=200_000) + loop("disks", a=180_000, b=200_000, c=1_000) + loop("cubes", a=200_000, b=200_000, c=200_000) + +if __name__ == "__main__": + import sys + check(models=sys.argv[1:]) diff --git a/explore/precision.py b/explore/precision.py index fdc009b78..05880f767 100755 --- a/explore/precision.py +++ b/explore/precision.py @@ -803,6 +803,21 @@ def np_2J1x_x(x): ocl_function=make_ocl("return sas_2J1x_x(q);", "sas_2J1x_x", ["lib/polevl.c", "lib/sas_J1.c"]), ) +add_function( + name="sin_arccos", + mp_function=lambda x: (mp.sin(mp.acos(x))), + np_function=lambda x: (np.sin(np.arccos(x))), + ocl_function=make_ocl("return sin(acos(q));", "sas_sin_arccos"), + limits=(0, 1), +) +add_function( + name="sin_from_cos", + mp_function=lambda x: (mp.sqrt(1 - x*x)), + np_function=lambda x: (np.sqrt(1-x*x)), + ocl_function=make_ocl("return sqrt(1.-q*q);", "sas_sin_from_cos"), + limits=(0, 1), +) + ALL_FUNCTIONS = set(FUNCTIONS.keys()) ALL_FUNCTIONS.discard("loggamma") # use cephes-based gammaln instead ALL_FUNCTIONS.discard("3j1/x:taylor") diff --git a/sasmodels/compare.py b/sasmodels/compare.py index 26b67ada8..847135d44 100755 --- a/sasmodels/compare.py +++ b/sasmodels/compare.py @@ -74,8 +74,8 @@ def __call__(self, **par: float) -> np.ndarray: ... -zero indicates that q=0 should be included === model parameters === - -preset*/-random[=seed] preset or random parameters - -sets=n generates n random datasets with the seed given by -random=seed + -preset*/-random/-seed=k preset or random parameters + -sets=n generates n random datasets -pars/-nopars* prints the parameter set or not -sphere[=150] set up spherical integration over theta/phi using n points -mono*/-poly suppress or allow polydispersity on generated parameters @@ -89,9 +89,9 @@ def __call__(self, **par: float) -> np.ndarray: ... -ngauss=0 overrides the number of points in the 1-D gaussian quadrature === precision options === - -engine=default uses the default calcution precision - -single/-double/-half/-fast sets an OpenCL calculation engine - -single!/-double!/-quad! sets an OpenMP calculation engine + -single/-double/-half/-fast sets a GPU calculation engine + -single!/-double!/-quad! sets a CPU calculation engine + -engine=a,b compare I(q) calculated with engine a and b === plotting === -plot*/-noplot plots or suppress the plot of the model @@ -1049,7 +1049,7 @@ def plot_models(opts, result, limits=None, setnum=0): '2d', '1d', 'sesans', # Parameter set - 'preset', 'random', 'random=', 'sets=', + 'preset', 'random', 'random=', 'sets=', 'seed=', 'nopars', 'pars', 'sphere', 'sphere=', # integrate over a sphere in 2d with n points 'poly', 'mono', @@ -1253,6 +1253,7 @@ def parse_opts(argv): elif arg.startswith('-res='): opts['res'] = arg[5:] elif arg.startswith('-noise='): opts['noise'] = float(arg[7:]) elif arg.startswith('-sets='): opts['sets'] = int(arg[6:]) + elif arg.startswith('-seed='): opts['seed'] = int(arg[6:]) elif arg.startswith('-accuracy='): opts['accuracy'] = arg[10:] elif arg.startswith('-cutoff='): opts['cutoff'] = arg[8:] elif arg.startswith('-title='): opts['title'] = arg[7:] @@ -1330,6 +1331,10 @@ def parse_opts(argv): if PAR_SPLIT in opts['ngauss']: opts['ngauss'] = [int(k) for k in opts['ngauss'].split(PAR_SPLIT, 2)] + # TODO: change set_integration to be non-stateful so we can put adaptive after fixed + if opts['ngauss'][1] == 0: + a, b = opts['ngauss'] + raise ValueError(f"Use -ngauss={b},{a} rather than -ngauss={a},{b}") comparison = True else: opts['ngauss'] = [int(opts['ngauss'])]*2 diff --git a/sasmodels/direct_model.py b/sasmodels/direct_model.py index 7c1004ff2..e9c133c6b 100644 --- a/sasmodels/direct_model.py +++ b/sasmodels/direct_model.py @@ -453,14 +453,18 @@ def test_reparameterize(): except Exception: pass -def _direct_calculate(model, data, pars): +def _direct_calculate(model, data, pars, ngauss=0): from .core import build_model, load_model_info + from .generate import set_integration_size + model_info = load_model_info(model) + if ngauss != 0: + set_integration_size(model_info, ngauss) kernel = build_model(model_info) calculator = DirectModel(data, kernel) return calculator(**pars) -def Iq(model, q, dq=None, ql=None, qw=None, **pars): +def Iq(model, q, dq=None, ql=None, qw=None, ngauss=0, **pars): """ Compute I(q) for *model*. Resolution is *dq* for pinhole or *ql* and *qw* for slit geometry. Use 0 or None for infinite slits. @@ -498,16 +502,16 @@ def broadcast(v): else np.full(len(q), v) if np.isscalar(v) else _as_numpy(v)) data.dxl, data.dxw = broadcast(ql), broadcast(qw) - return _direct_calculate(model, data, pars) + return _direct_calculate(model, data, pars, ngauss=ngauss) -def Iqxy(model, qx, qy, dqx=None, dqy=None, **pars): +def Iqxy(model, qx, qy, dqx=None, dqy=None, ngauss=0, **pars): """ Compute I(qx, qy) for *model*. Resolution is *dqx* and *dqy*. See :func:`Iq` for details on model and parameters. """ from .data import Data2D data = Data2D(x=qx, y=qy, dx=dqx, dy=dqy) - return _direct_calculate(model, data, pars) + return _direct_calculate(model, data, pars, ngauss=ngauss) def Gxi(model, xi, **pars): """ @@ -528,6 +532,8 @@ def main(): if len(sys.argv) < 3: print("usage: python -m sasmodels.direct_model modelname (q|qx,qy) par=val ...") sys.exit(1) + + ngauss = 0 model = sys.argv[1] call = sys.argv[2].upper() pars = dict((k, (float(v) if not k.endswith("_pd_type") else v)) @@ -542,13 +548,13 @@ def main(): dq = dqw = dql = None #dq = [q*0.05] # 5% pinhole resolution dqw, dql = [q*0.05], [1.0] # 5% horizontal slit resolution - print(Iq(model, [q], dq=dq, qw=dqw, ql=dql, **pars)[0]) + print(Iq(model, [q], dq=dq, qw=dqw, ql=dql, ngauss=ngauss, **pars)[0]) #print(Gxi(model, [q], **pars)[0]) elif len(values) == 2: qx, qy = values dq = None #dq = [0.005] # 5% pinhole resolution at q = 0.1 - print(Iqxy(model, [qx], [qy], dqx=dq, dqy=dq, **pars)[0]) + print(Iqxy(model, [qx], [qy], dqx=dq, dqy=dq, ngauss=ngauss, **pars)[0]) else: print("use q or qx,qy") sys.exit(1) diff --git a/sasmodels/generate.py b/sasmodels/generate.py index 0ae362d69..e0c32ea2a 100644 --- a/sasmodels/generate.py +++ b/sasmodels/generate.py @@ -281,7 +281,13 @@ def get_data_path(external_dir, target_file): %(docs)s """ - +# TODO: set_integration_size should return a copy of info +# For now it is set up so that the first call converts adaptive to non-adaptive +# and subsequent calls convert number of gauss points for the non-adaptive. +# There is currently no way to go back to adaptive after call set_integration_size. +# If model_info is cached this will cause problems for a GUI or a notebook where the +# user specifies n_gauss=0 for adaptive. This shows up with sasmodels.compare where +# -n_gauss=0,76 shows differences but -n_gauss=76,0 shows none. def set_integration_size(info, n): # type: (ModelInfo, int) -> None """ @@ -291,13 +297,27 @@ def set_integration_size(info, n): Note: this really ought to be a method in modelinfo, but that leads to import loops. """ - if info.source and any(lib.startswith('lib/gauss') for lib in info.source): - from .gengauss import gengauss - path = joinpath(MODEL_PATH, "lib", "gauss%d.c"%n) - if not exists(path): - gengauss(n, path) - info.source = ["lib/gauss%d.c"%n if lib.startswith('lib/gauss') - else lib for lib in info.source] + from .gengauss import gengauss + + if not info.source: + return + + # Generate the integration points + path = joinpath(MODEL_PATH, "lib", f"gauss{n}.c") + if not exists(path): + # print(f"building Gaussian integration points of size {n} in {str(path)}") + gengauss(n, path) + + # Replace adaptive.c or lib/gauss.c + try: + index = info.source.index("lib/adaptive.c") + info.source[index:index+1] = [f"lib/gauss{n}.c", "lib/nonadaptive.c"] + except ValueError: + for index in range(len(info.source)-1, -1, -1): + if info.source[index].startswith("lib/gauss"): + info.source[index] = f"lib/gauss{n}.c" + break + # print("info.source is now", info.source) def format_units(units): # type: (str) -> str diff --git a/sasmodels/gengauss.py b/sasmodels/gengauss.py index a9ce027f4..2ef063133 100755 --- a/sasmodels/gengauss.py +++ b/sasmodels/gengauss.py @@ -21,19 +21,18 @@ def gengauss(n, path): array_size = n with open(path, "w") as fid: - fid.write("""\ -// Generated by sasmodels.gengauss.gengauss(%d) + fid.write(f"""\ +// Generated by sasmodels.gengauss.gengauss({n}) #ifdef GAUSS_N # undef GAUSS_N # undef GAUSS_Z # undef GAUSS_W #endif -#define GAUSS_N %d -#define GAUSS_Z Gauss%dZ -#define GAUSS_W Gauss%dWt - -"""%(n, n, n, n)) +#define GAUSS_N {n} +#define GAUSS_Z Gauss{n}Z +#define GAUSS_W Gauss{n}Wt +""") if array_size != n: fid.write("// Note: using array size %d so that it is a multiple of 4\n\n"%array_size) diff --git a/sasmodels/model_test.py b/sasmodels/model_test.py index 390df7454..80fec08ef 100755 --- a/sasmodels/model_test.py +++ b/sasmodels/model_test.py @@ -579,6 +579,31 @@ def _build_test(test): for test in tests: yield _build_test(test) +def _generate_target_values(modelname, ngauss=0): + from .generate import set_integration_size + + model_info = load_model_info(modelname) + if ngauss != 0: + set_integration_size(model_info, ngauss) + model = build_model(model_info, platform="dll", dtype="d") + + for pars, q, Iq in model_info.tests: + qin = q + if isinstance(Iq, float): + q, Iq = [q], [Iq] + if isinstance(q[0], tuple): + qx, qy = zip(*q) + q_vectors = [np.array(qx), np.array(qy)] + else: + q_vectors = [np.array(q)] + kernel = model.make_kernel(q_vectors) + target = np.array(Iq) + actual = call_kernel(kernel, pars) + if True or (actual != target).any(): + print("Test:", modelname, pars) + print(" q = ", qin) + print(f" current => [{', '.join(f'{v:.15g}' for v in target)}]") + print(f" ngauss={ngauss} => [{', '.join(f'{v:.15g}' for v in actual)}]") def main(): # type: () -> int @@ -601,6 +626,11 @@ def main(): help="Engines on which to run the test. " "Valid values are opencl, cuda, dll, and all. " "Defaults to all if no value is given") + parser.add_argument("-t", "--targets", action="store_true", + help="Generate target values for test.") + parser.add_argument("--ngauss", type=int, default=10000, + help="Number of gauss points to use in integration for " + "target values. Warning: this is very slow the first time.") parser.add_argument("models", nargs="*", help="The names of the models to be tested. " "If the first model is 'all', then all but the listed " @@ -630,9 +660,13 @@ def main(): print("unknown engine " + opts.engine) return 1 - runner = TestRunner(verbosity=opts.verbose, **test_args) - result = runner.run(make_suite(loaders, opts.models)) - return 1 if result.failures or result.errors else 0 + if opts.targets: + for model in opts.models: + _generate_target_values(model, ngauss=opts.ngauss) + else: + runner = TestRunner(verbosity=opts.verbose, **test_args) + result = runner.run(make_suite(loaders, opts.models)) + return 1 if result.failures or result.errors else 0 if __name__ == "__main__": diff --git a/sasmodels/models/barbell.c b/sasmodels/models/barbell.c index 87f1553e3..e8463fed0 100644 --- a/sasmodels/models/barbell.c +++ b/sasmodels/models/barbell.c @@ -1,7 +1,7 @@ //barbell kernel - same as dumbbell static double _bell_kernel(double qab, double qc, double h, double radius_bell, - double half_length) + double half_length, int outer_n) { // translate a point in [-1,1] to a point in [lower,upper] const double upper = 1.0; @@ -19,13 +19,18 @@ _bell_kernel(double qab, double qc, double h, double radius_bell, const double m = radius_bell*qc; // cos argument slope const double b = (half_length+h)*qc; // cos argument intercept const double qab_r = radius_bell*qab; // Q*R*sin(theta) + + const double qr_max = fmax(qab_r, m+b); + constant double *w, *z; + int n = gauss_weights(qr_max, outer_n, &w, &z); + double total = 0.0; - for (int i = 0; i < GAUSS_N; i++){ - const double t = GAUSS_Z[i]*zm + zb; + for (int i = 0; i < n; i++){ + const double t = z[i]*zm + zb; const double radical = 1.0 - t*t; const double bj = sas_2J1x_x(qab_r*sqrt(radical)); const double Fq = cos(m*t + b) * radical * bj; - total += GAUSS_W[i] * Fq; + total += w[i] * Fq; } // translate dx in [-1,1] to dx in [lower,upper] const double integral = total*zm; @@ -35,9 +40,9 @@ _bell_kernel(double qab, double qc, double h, double radius_bell, static double _fq(double qab, double qc, double h, - double radius_bell, double radius, double half_length) + double radius_bell, double radius, double half_length, int n_outer) { - const double bell_fq = _bell_kernel(qab, qc, h, radius_bell, half_length); + const double bell_fq = _bell_kernel(qab, qc, h, radius_bell, half_length, n_outer); const double bj = sas_2J1x_x(radius*qab); const double si = sas_sinx_x(half_length*qc); const double cyl_fq = 2.0*M_PI*radius*radius*half_length*bj*si; @@ -110,21 +115,32 @@ Fq(double q,double *F1, double *F2, double sld, double solvent_sld, const double h = sqrt(square(radius_bell) - square(radius)); const double half_length = 0.5*length; + // The term h comes from solving the right triangle with diagonal + // equal to the bell radius and horizontal equal to the bar radius. + // The result is the height of the equator above the end of the rod. + // To get the total length of bar+bell use bar length + 2*(bell radius + h). + // We want the radius, so divide that by two. + const double qr_max = q*(half_length + radius_bell + h); + constant double *w_outer, *z_outer; + // Keep outer loop to 76 or less + int n_outer = gauss_weights(qr_max, ADAPTIVE_MAX_76, &w_outer, &z_outer); + // translate a point in [-1,1] to a point in [0, pi/2] const double zm = M_PI_4; const double zb = M_PI_4; double total_F1 = 0.0; double total_F2 = 0.0; - for (int i = 0; i < GAUSS_N; i++){ - const double theta = GAUSS_Z[i]*zm + zb; + for (int i = 0; i < n_outer; i++){ + const double theta = z_outer[i]*zm + zb; double sin_theta, cos_theta; // slots to hold sincos function output SINCOS(theta, sin_theta, cos_theta); const double qab = q*sin_theta; const double qc = q*cos_theta; - const double Aq = _fq(qab, qc, h, radius_bell, radius, half_length); + // Don't constrain size of inner loop to n_outer + const double Aq = _fq(qab, qc, h, radius_bell, radius, half_length, 1); // scale by sin_theta for spherical coord integration - total_F1 += GAUSS_W[i] * Aq * sin_theta; - total_F2 += GAUSS_W[i] * Aq * Aq * sin_theta; + total_F1 += w_outer[i] * Aq * sin_theta; + total_F2 += w_outer[i] * Aq * Aq * sin_theta; } // translate dx in [-1,1] to dx in [lower,upper] const double form_avg = total_F1 * zm; @@ -141,8 +157,10 @@ Iqac(double qab, double qc, double sld, double solvent_sld, double radius_bell, double radius, double length) { + // TODO: For 2D data we may also want to limit the size of the integral at each (qx, qy) + const int n_outer = 1; // No limits on inner integral const double h = sqrt(square(radius_bell) - square(radius)); - const double Aq = _fq(qab, qc, h, radius_bell, radius, 0.5*length); + const double Aq = _fq(qab, qc, h, radius_bell, radius, 0.5*length, n_outer); // Multiply by contrast^2 and convert to cm-1 const double s = (sld - solvent_sld); diff --git a/sasmodels/models/barbell.py b/sasmodels/models/barbell.py index c29b7954d..13b4be74e 100644 --- a/sasmodels/models/barbell.py +++ b/sasmodels/models/barbell.py @@ -117,7 +117,7 @@ ] # pylint: enable=bad-whitespace, line-too-long -source = ["lib/polevl.c", "lib/sas_J1.c", "lib/gauss76.c", "barbell.c"] +source = ["lib/polevl.c", "lib/sas_J1.c", "lib/adaptive.c", "barbell.c"] valid = "radius_bell >= radius" have_Fq = True radius_effective_modes = [ diff --git a/sasmodels/models/capped_cylinder.c b/sasmodels/models/capped_cylinder.c index c792574cb..0132fc716 100644 --- a/sasmodels/models/capped_cylinder.c +++ b/sasmodels/models/capped_cylinder.c @@ -7,8 +7,8 @@ // length is the cylinder length, or the separation between the lens halves // theta is the angle of the cylinder wrt q. static double -_cap_kernel(double qab, double qc, double h, double radius_cap, - double half_length) +_cap_kernel(double qab, double qc, double h, double radius_cap, double radius, + double half_length, int outer_n) { // translate a point in [-1,1] to a point in [lower,upper] const double upper = 1.0; @@ -26,13 +26,25 @@ _cap_kernel(double qab, double qc, double h, double radius_cap, const double m = radius_cap*qc; // cos argument slope const double b = (half_length+h)*qc; // cos argument intercept const double qab_r = radius_cap*qab; // Q*R*sin(theta) + + // m+b = qc*(half_length + radius_cap + h). With h in [-radius_cap, 0] depending + // on cylinder radius, that means m+b is in qc*[length/2, length_2 + radius_cap]. + // The qab_r term will be very large for mostly flat caps. Since the bj term will + // oscillate at this frequency, it seems like we should increase the number of + // gauss points to accomodate. However, if we use the radius of the cylinder + // we seem to get good results, so use that to set the number of integration points. + //const double qr_max = fmax(qab_r, m+b); + const double qr_max = fmax(qab*radius, m+b); + constant double *w, *z; + int n = gauss_weights(qr_max, outer_n, &w, &z); + double total = 0.0; - for (int i=0; i [0, 1] const double m = 0.5; const double b = 0.5; double total_F1 = 0.0; //initialize intergral double total_F2 = 0.0; //initialize intergral - for(int i=0;i [0, 1] +// ... +// const double *w_inner, *z_inner; +// const int n_inner = gauss_weights(qr_inner, n_outer, &w_inner, &z_inner); +// double total_inner = 0.; +// for (int k=0; k < n_inner; k++) { +// phi = M_PI*(z_inner[k] + 1.0); // [-1, 1] => [0, 2 pi] +// ... +// total_inner += w_inner[k]*Fq; +// } +// total_outer += w_outer[k]*total_inner; +// } +// total_outer /= 2.0*M_PI; // correct for dφ = 2πw/2, du = w/2 +// +// Some models (barbell, capped cylinder, pringle) are not using spherical integration. +// Instead we find that limiting the outer integral to 76 points leads to okay results: +// +// n_outer = gauss_weights(qr_outer, ADAPTIVE_MAX_76, &w_outer, &z_outer); +// +#define _ADAPTIVE_MAX_N 100000 +#define ADAPTIVE_MAX_76 (_ADAPTIVE_MAX_N / 76) +#define ADAPTIVE_MAX_OUTER (_ADAPTIVE_MAX_N / 500) +static int +gauss_weights(double qr, int outer_n, constant double **w, constant double **z) +{ + // TODO: add more stages (150, 1500) so the n^2 model slowdown is a little less brutal + // TODO: adjust cutoff for n=500 + // For the pringle model I get npoints = 0.365 qr_max + 43 + // and empirical values 14 => 20, 120 => 76, 1200 => 500, 14500 => 5000 + // The error threshhold is abrupt: if n is too low the result is bad, but + // there is little benefit for having too large an n. + // These results are specific to the pringle inner integral and may not hold in general. + // Check that all models have reasonable cutoff. + // *w = Gauss5000Wt; *z = Gauss5000Z; return 5000; // max precision + // *w = Gauss76Wt; *z = Gauss76Z; return 76; // default + + if (qr < 10 || outer_n > _ADAPTIVE_MAX_N / 76) { + *w = Gauss20Wt; *z = Gauss20Z; return 20; + } else if (qr < 100 || outer_n > _ADAPTIVE_MAX_N / 500) { + *w = Gauss76Wt; *z = Gauss76Z; return 76; + } else if (qr < 750 || outer_n > _ADAPTIVE_MAX_N / 5000) { + *w = Gauss500Wt; *z = Gauss500Z; return 500; + } else { + *w = Gauss5000Wt; *z = Gauss5000Z; return 5000; + } +} \ No newline at end of file diff --git a/sasmodels/models/lib/nonadaptive.c b/sasmodels/models/lib/nonadaptive.c new file mode 100644 index 000000000..8c1715ba8 --- /dev/null +++ b/sasmodels/models/lib/nonadaptive.c @@ -0,0 +1,13 @@ +// To force a fixed rather than adaptive integration scheme, replace [..., "lib/adaptive.c", ...] +// with [..., "lib/gauss.c", "lib/nonadaptive.c", ...] in your source lists. + +// Hack for barbell and capped cylinder keeps the outer integral to 76 points or fewer +// by calling gauss_weights with n_outer=ADAPTIVE_MAX_76 +#define ADAPTIVE_MAX_76 1 // Doesn't matter since n_outer is ignored +#define ADAPTIVE_MAX_OUTER 1 // Doesn't matter since n_outer is ignored + +static int +gauss_weights(double qr, int n_outer, constant double **w, constant double **z) +{ + *w = GAUSS_W; *z = GAUSS_Z; return GAUSS_N; +} \ No newline at end of file diff --git a/sasmodels/models/parallelepiped.c b/sasmodels/models/parallelepiped.c index 7b5262b6d..d3f041c78 100644 --- a/sasmodels/models/parallelepiped.c +++ b/sasmodels/models/parallelepiped.c @@ -75,34 +75,42 @@ Fq(double q, const double a_scaled = length_a / length_b; const double c_scaled = length_c / length_b; + const double qr_max_outer = q*fmax(sqrt(length_a*length_a + length_b*length_b), length_c)/2; + constant double *w_outer, *z_outer; + int n_outer = gauss_weights(qr_max_outer, ADAPTIVE_MAX_OUTER, &w_outer, &z_outer); + // outer integral (with gauss points), integration limits = 0, 1 double outer_total_F1 = 0.0; //initialize integral double outer_total_F2 = 0.0; //initialize integral - for( int i=0; i0; n--) { - _integrate_bessel(radius, alpha, beta, q_sin_psi, q_cos_psi, n, &Sn, &Cn); + _integrate_bessel(radius, alpha, beta, q_sin_psi, q_cos_psi, n, &Sn, &Cn, n_outer); sum += 2.0*(Sn*Sn + Cn*Cn); } - _integrate_bessel(radius, alpha, beta, q_sin_psi, q_cos_psi, 0, &Sn, &Cn); + _integrate_bessel(radius, alpha, beta, q_sin_psi, q_cos_psi, 0, &Sn, &Cn, n_outer); sum += Sn*Sn+ Cn*Cn; return sum; } @@ -80,19 +86,26 @@ double _integrate_psi( double alpha, double beta) { + const double qhalf_thickness = q*0.5*thickness; + const double qr_max = fmax(qhalf_thickness, q*radius); + constant double *w_outer, *z_outer; + // Keep outer loop to 76 or less + int n_outer = gauss_weights(qr_max, ADAPTIVE_MAX_76, &w_outer, &z_outer); + // printf("qr_max=%.1f n=%d\n", qr_max, n_outer); + // translate gauss point z in [-1,1] to a point in [0, pi/2] const double zm = M_PI_4; const double zb = M_PI_4; double sum = 0.0; - for (int i = 0; i < GAUSS_N; i++) { - double psi = GAUSS_Z[i]*zm + zb; + for (int i = 0; i < n_outer; i++) { + double psi = z_outer[i]*zm + zb; double sin_psi, cos_psi; SINCOS(psi, sin_psi, cos_psi); - double bessel_term = _sum_bessel_orders(radius, alpha, beta, q*sin_psi, q*cos_psi); - double sinc_term = square(sas_sinx_x(q * thickness * cos_psi / 2.0)); + double bessel_term = _sum_bessel_orders(radius, alpha, beta, q*sin_psi, q*cos_psi, n_outer); + double sinc_term = square(sas_sinx_x(qhalf_thickness * cos_psi)); double pringle_kernel = 4.0 * sin_psi * bessel_term * sinc_term; - sum += GAUSS_W[i] * pringle_kernel; + sum += w_outer[i] * pringle_kernel; } return zm * sum; diff --git a/sasmodels/models/pringle.py b/sasmodels/models/pringle.py index d086245a4..d7207ef5a 100644 --- a/sasmodels/models/pringle.py +++ b/sasmodels/models/pringle.py @@ -76,7 +76,7 @@ source = ["lib/polevl.c", "lib/sas_J0.c", "lib/sas_J1.c", - "lib/sas_JN.c", "lib/gauss76.c", "pringle.c"] + "lib/sas_JN.c", "lib/adaptive.c", "pringle.c"] radius_effective_modes = [ "equivalent cylinder excluded volume", "equivalent volume sphere", diff --git a/sasmodels/models/rectangular_prism.c b/sasmodels/models/rectangular_prism.c index b75a7470b..473def35c 100644 --- a/sasmodels/models/rectangular_prism.c +++ b/sasmodels/models/rectangular_prism.c @@ -36,6 +36,8 @@ radius_effective(int mode, double length_a, double b2a_ratio, double c2a_ratio) } } +// TODO: why is there a separate Iq and Fq? +/* static double Iq(double q, double sld, @@ -98,6 +100,7 @@ Iq(double q, return answer; } +*/ static void Fq(double q, @@ -115,6 +118,10 @@ Fq(double q, const double b_half = 0.5 * length_b; const double c_half = 0.5 * length_c; + const double qr_max_outer = q*fmax(sqrt(length_a*length_a + length_b*length_b), length_c)/2; + constant double *w_outer, *z_outer; + int n_outer = gauss_weights(qr_max_outer, ADAPTIVE_MAX_OUTER, &w_outer, &z_outer); + //Integration limits to use in Gaussian quadrature const double v1a = 0.0; const double v1b = M_PI_2; //theta integration limits @@ -123,17 +130,21 @@ Fq(double q, double outer_sum_F1 = 0.0; double outer_sum_F2 = 0.0; - for(int i=0; i (Ra, Rb) so that the integration will be more accurate. + const double maybe_min = fmin(Ra, Rb); + const double maybe_max = fmax(Ra, Rb); + const double maybe_mid = fmax(maybe_min, Rc); + const double radius_equat_minor = fmin(maybe_min, Rc); + const double radius_equat_major = fmin(maybe_max, maybe_mid); + const double radius_polar = fmax(maybe_max, maybe_mid); + const double pa = square(radius_equat_minor/radius_equat_major) - 1.0; const double pc = square(radius_polar/radius_equat_major) - 1.0; + + const double qr_max_inner = fmax(q*radius_equat_minor, q*radius_equat_major); + const double qr_max = fmax(qr_max_inner, q*radius_polar); + constant double *z_outer, *w_outer; + int n_outer = gauss_weights(qr_max, ADAPTIVE_MAX_OUTER, &w_outer, &z_outer); + + constant double *z_inner, *w_inner; + int n_inner = gauss_weights(qr_max_inner, n_outer, &w_inner, &z_inner); + // printf("outer: qr=%g n=%d inner: qr=%g n=%d\n", qr_max, n_outer, qr_max_inner, n_inner); + // translate a point in [-1,1] to a point in [0, pi/2] const double zm = M_PI_4; const double zb = M_PI_4; double outer_sum_F1 = 0.0; double outer_sum_F2 = 0.0; - for (int i=0;i -#include +//#include +//#include //truncated octahedron volume // NOTE: needs to be called form_volume() for a shape category