PythonExtra/py/builtinimport.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

689 lines
28 KiB
C
Raw Normal View History

/*
* This file is part of the MicroPython project, http://micropython.org/
*
* The MIT License (MIT)
*
* Copyright (c) 2013-2019 Damien P. George
* Copyright (c) 2014 Paul Sokolovsky
* Copyright (c) 2021 Jim Mussared
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "py/compile.h"
#include "py/objmodule.h"
#include "py/persistentcode.h"
#include "py/runtime.h"
#include "py/builtin.h"
#include "py/frozenmod.h"
#if MICROPY_DEBUG_VERBOSE // print debugging info
#define DEBUG_PRINT (1)
#define DEBUG_printf DEBUG_printf
#else // don't print debugging info
#define DEBUG_PRINT (0)
#define DEBUG_printf(...) (void)0
#endif
#if MICROPY_ENABLE_EXTERNAL_IMPORT
// Must be a string of one byte.
#define PATH_SEP_CHAR "/"
// Virtual sys.path entry that maps to the frozen modules.
#define MP_FROZEN_PATH_PREFIX ".frozen/"
// Wrapper for mp_import_stat (which is provided by the port, and typically
// uses mp_vfs_import_stat) to also search frozen modules. Given an exact
// path to a file or directory (e.g. "foo/bar", foo/bar.py" or "foo/bar.mpy"),
// will return whether the path is a file, directory, or doesn't exist.
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
STATIC mp_import_stat_t stat_path(const char *path) {
#if MICROPY_MODULE_FROZEN
// Only try and load as a frozen module if it starts with .frozen/.
const int frozen_path_prefix_len = strlen(MP_FROZEN_PATH_PREFIX);
if (strncmp(path, MP_FROZEN_PATH_PREFIX, frozen_path_prefix_len) == 0) {
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// Just stat (which is the return value), don't get the data.
return mp_find_frozen_module(path + frozen_path_prefix_len, NULL, NULL);
}
#endif
return mp_import_stat(path);
}
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// Stat a given filesystem path to a .py file. If the file does not exist,
// then attempt to stat the corresponding .mpy file, and update the path
// argument. This is the logic that makes .py files take precedent over .mpy
// files. This uses stat_path above, rather than mp_import_stat directly, so
// that the .frozen path prefix is handled.
STATIC mp_import_stat_t stat_file_py_or_mpy(vstr_t *path) {
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
mp_import_stat_t stat = stat_path(vstr_null_terminated_str(path));
if (stat == MP_IMPORT_STAT_FILE) {
return stat;
}
#if MICROPY_PERSISTENT_CODE_LOAD
// Didn't find .py -- try the .mpy instead by inserting an 'm' into the '.py'.
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// Note: There's no point doing this if it's a frozen path, but adding the check
// would be extra code, and no harm letting mp_find_frozen_module fail instead.
vstr_ins_byte(path, path->len - 2, 'm');
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
stat = stat_path(vstr_null_terminated_str(path));
if (stat == MP_IMPORT_STAT_FILE) {
return stat;
}
#endif
return MP_IMPORT_STAT_NO_EXIST;
}
// Given an import path (e.g. "foo/bar"), try and find "foo/bar" (a directory)
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// or "foo/bar.(m)py" in either the filesystem or frozen modules. If the
// result is a file, the path argument will be updated to include the file
// extension.
STATIC mp_import_stat_t stat_module(vstr_t *path) {
mp_import_stat_t stat = stat_path(vstr_null_terminated_str(path));
DEBUG_printf("stat %s: %d\n", vstr_str(path), stat);
if (stat == MP_IMPORT_STAT_DIR) {
return stat;
}
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// Not a directory, add .py and try as a file.
vstr_add_str(path, ".py");
return stat_file_py_or_mpy(path);
}
// Given a top-level module name, try and find it in each of the sys.path
// entries. Note: On success, the dest argument will be updated to the matching
// path (i.e. "<entry>/mod_name(.py)").
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
STATIC mp_import_stat_t stat_top_level(qstr mod_name, vstr_t *dest) {
DEBUG_printf("stat_top_level: '%s'\n", qstr_str(mod_name));
#if MICROPY_PY_SYS
size_t path_num;
mp_obj_t *path_items;
mp_obj_get_array(mp_sys_path, &path_num, &path_items);
// go through each sys.path entry, trying to import "<entry>/<mod_name>".
for (size_t i = 0; i < path_num; i++) {
vstr_reset(dest);
size_t p_len;
const char *p = mp_obj_str_get_data(path_items[i], &p_len);
if (p_len > 0) {
// Add the path separator (unless the entry is "", i.e. cwd).
vstr_add_strn(dest, p, p_len);
vstr_add_char(dest, PATH_SEP_CHAR[0]);
}
vstr_add_str(dest, qstr_str(mod_name));
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
mp_import_stat_t stat = stat_module(dest);
if (stat != MP_IMPORT_STAT_NO_EXIST) {
return stat;
}
}
// sys.path was empty or no matches, do not search the filesystem or
// frozen code.
return MP_IMPORT_STAT_NO_EXIST;
#else
// mp_sys_path is not enabled, so just stat the given path directly.
vstr_add_str(dest, qstr_str(mod_name));
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
return stat_module(dest);
#endif
}
#if MICROPY_MODULE_FROZEN_STR || MICROPY_ENABLE_COMPILER
py: Rework bytecode and .mpy file format to be mostly static data. Background: .mpy files are precompiled .py files, built using mpy-cross, that contain compiled bytecode functions (and can also contain machine code). The benefit of using an .mpy file over a .py file is that they are faster to import and take less memory when importing. They are also smaller on disk. But the real benefit of .mpy files comes when they are frozen into the firmware. This is done by loading the .mpy file during compilation of the firmware and turning it into a set of big C data structures (the job of mpy-tool.py), which are then compiled and downloaded into the ROM of a device. These C data structures can be executed in-place, ie directly from ROM. This makes importing even faster because there is very little to do, and also means such frozen modules take up much less RAM (because their bytecode stays in ROM). The downside of frozen code is that it requires recompiling and reflashing the entire firmware. This can be a big barrier to entry, slows down development time, and makes it harder to do OTA updates of frozen code (because the whole firmware must be updated). This commit attempts to solve this problem by providing a solution that sits between loading .mpy files into RAM and freezing them into the firmware. The .mpy file format has been reworked so that it consists of data and bytecode which is mostly static and ready to run in-place. If these new .mpy files are located in flash/ROM which is memory addressable, the .mpy file can be executed (mostly) in-place. With this approach there is still a small amount of unpacking and linking of the .mpy file that needs to be done when it's imported, but it's still much better than loading an .mpy from disk into RAM (although not as good as freezing .mpy files into the firmware). The main trick to make static .mpy files is to adjust the bytecode so any qstrs that it references now go through a lookup table to convert from local qstr number in the module to global qstr number in the firmware. That means the bytecode does not need linking/rewriting of qstrs when it's loaded. Instead only a small qstr table needs to be built (and put in RAM) at import time. This means the bytecode itself is static/constant and can be used directly if it's in addressable memory. Also the qstr string data in the .mpy file, and some constant object data, can be used directly. Note that the qstr table is global to the module (ie not per function). In more detail, in the VM what used to be (schematically): qst = DECODE_QSTR_VALUE; is now (schematically): idx = DECODE_QSTR_INDEX; qst = qstr_table[idx]; That allows the bytecode to be fixed at compile time and not need relinking/rewriting of the qstr values. Only qstr_table needs to be linked when the .mpy is loaded. Incidentally, this helps to reduce the size of bytecode because what used to be 2-byte qstr values in the bytecode are now (mostly) 1-byte indices. If the module uses the same qstr more than two times then the bytecode is smaller than before. The following changes are measured for this commit compared to the previous (the baseline): - average 7%-9% reduction in size of .mpy files - frozen code size is reduced by about 5%-7% - importing .py files uses about 5% less RAM in total - importing .mpy files uses about 4% less RAM in total - importing .py and .mpy files takes about the same time as before The qstr indirection in the bytecode has only a small impact on VM performance. For stm32 on PYBv1.0 the performance change of this commit is: diff of scores (higher is better) N=100 M=100 baseline -> this-commit diff diff% (error%) bm_chaos.py 371.07 -> 357.39 : -13.68 = -3.687% (+/-0.02%) bm_fannkuch.py 78.72 -> 77.49 : -1.23 = -1.563% (+/-0.01%) bm_fft.py 2591.73 -> 2539.28 : -52.45 = -2.024% (+/-0.00%) bm_float.py 6034.93 -> 5908.30 : -126.63 = -2.098% (+/-0.01%) bm_hexiom.py 48.96 -> 47.93 : -1.03 = -2.104% (+/-0.00%) bm_nqueens.py 4510.63 -> 4459.94 : -50.69 = -1.124% (+/-0.00%) bm_pidigits.py 650.28 -> 644.96 : -5.32 = -0.818% (+/-0.23%) core_import_mpy_multi.py 564.77 -> 581.49 : +16.72 = +2.960% (+/-0.01%) core_import_mpy_single.py 68.67 -> 67.16 : -1.51 = -2.199% (+/-0.01%) core_qstr.py 64.16 -> 64.12 : -0.04 = -0.062% (+/-0.00%) core_yield_from.py 362.58 -> 354.50 : -8.08 = -2.228% (+/-0.00%) misc_aes.py 429.69 -> 405.59 : -24.10 = -5.609% (+/-0.01%) misc_mandel.py 3485.13 -> 3416.51 : -68.62 = -1.969% (+/-0.00%) misc_pystone.py 2496.53 -> 2405.56 : -90.97 = -3.644% (+/-0.01%) misc_raytrace.py 381.47 -> 374.01 : -7.46 = -1.956% (+/-0.01%) viper_call0.py 576.73 -> 572.49 : -4.24 = -0.735% (+/-0.04%) viper_call1a.py 550.37 -> 546.21 : -4.16 = -0.756% (+/-0.09%) viper_call1b.py 438.23 -> 435.68 : -2.55 = -0.582% (+/-0.06%) viper_call1c.py 442.84 -> 440.04 : -2.80 = -0.632% (+/-0.08%) viper_call2a.py 536.31 -> 532.35 : -3.96 = -0.738% (+/-0.06%) viper_call2b.py 382.34 -> 377.07 : -5.27 = -1.378% (+/-0.03%) And for unix on x64: diff of scores (higher is better) N=2000 M=2000 baseline -> this-commit diff diff% (error%) bm_chaos.py 13594.20 -> 13073.84 : -520.36 = -3.828% (+/-5.44%) bm_fannkuch.py 60.63 -> 59.58 : -1.05 = -1.732% (+/-3.01%) bm_fft.py 112009.15 -> 111603.32 : -405.83 = -0.362% (+/-4.03%) bm_float.py 246202.55 -> 247923.81 : +1721.26 = +0.699% (+/-2.79%) bm_hexiom.py 615.65 -> 617.21 : +1.56 = +0.253% (+/-1.64%) bm_nqueens.py 215807.95 -> 215600.96 : -206.99 = -0.096% (+/-3.52%) bm_pidigits.py 8246.74 -> 8422.82 : +176.08 = +2.135% (+/-3.64%) misc_aes.py 16133.00 -> 16452.74 : +319.74 = +1.982% (+/-1.50%) misc_mandel.py 128146.69 -> 130796.43 : +2649.74 = +2.068% (+/-3.18%) misc_pystone.py 83811.49 -> 83124.85 : -686.64 = -0.819% (+/-1.03%) misc_raytrace.py 21688.02 -> 21385.10 : -302.92 = -1.397% (+/-3.20%) The code size change is (firmware with a lot of frozen code benefits the most): bare-arm: +396 +0.697% minimal x86: +1595 +0.979% [incl +32(data)] unix x64: +2408 +0.470% [incl +800(data)] unix nanbox: +1396 +0.309% [incl -96(data)] stm32: -1256 -0.318% PYBV10 cc3200: +288 +0.157% esp8266: -260 -0.037% GENERIC esp32: -216 -0.014% GENERIC[incl -1072(data)] nrf: +116 +0.067% pca10040 rp2: -664 -0.135% PICO samd: +844 +0.607% ADAFRUIT_ITSYBITSY_M4_EXPRESS As part of this change the .mpy file format version is bumped to version 6. And mpy-tool.py has been improved to provide a good visualisation of the contents of .mpy files. In summary: this commit changes the bytecode to use qstr indirection, and reworks the .mpy file format to be simpler and allow .mpy files to be executed in-place. Performance is not impacted too much. Eventually it will be possible to store such .mpy files in a linear, read-only, memory- mappable filesystem so they can be executed from flash/ROM. This will essentially be able to replace frozen code for most applications. Signed-off-by: Damien George <damien@micropython.org>
2021-10-22 13:22:47 +02:00
STATIC void do_load_from_lexer(mp_module_context_t *context, mp_lexer_t *lex) {
#if MICROPY_PY___FILE__
qstr source_name = lex->source_name;
py: Rework bytecode and .mpy file format to be mostly static data. Background: .mpy files are precompiled .py files, built using mpy-cross, that contain compiled bytecode functions (and can also contain machine code). The benefit of using an .mpy file over a .py file is that they are faster to import and take less memory when importing. They are also smaller on disk. But the real benefit of .mpy files comes when they are frozen into the firmware. This is done by loading the .mpy file during compilation of the firmware and turning it into a set of big C data structures (the job of mpy-tool.py), which are then compiled and downloaded into the ROM of a device. These C data structures can be executed in-place, ie directly from ROM. This makes importing even faster because there is very little to do, and also means such frozen modules take up much less RAM (because their bytecode stays in ROM). The downside of frozen code is that it requires recompiling and reflashing the entire firmware. This can be a big barrier to entry, slows down development time, and makes it harder to do OTA updates of frozen code (because the whole firmware must be updated). This commit attempts to solve this problem by providing a solution that sits between loading .mpy files into RAM and freezing them into the firmware. The .mpy file format has been reworked so that it consists of data and bytecode which is mostly static and ready to run in-place. If these new .mpy files are located in flash/ROM which is memory addressable, the .mpy file can be executed (mostly) in-place. With this approach there is still a small amount of unpacking and linking of the .mpy file that needs to be done when it's imported, but it's still much better than loading an .mpy from disk into RAM (although not as good as freezing .mpy files into the firmware). The main trick to make static .mpy files is to adjust the bytecode so any qstrs that it references now go through a lookup table to convert from local qstr number in the module to global qstr number in the firmware. That means the bytecode does not need linking/rewriting of qstrs when it's loaded. Instead only a small qstr table needs to be built (and put in RAM) at import time. This means the bytecode itself is static/constant and can be used directly if it's in addressable memory. Also the qstr string data in the .mpy file, and some constant object data, can be used directly. Note that the qstr table is global to the module (ie not per function). In more detail, in the VM what used to be (schematically): qst = DECODE_QSTR_VALUE; is now (schematically): idx = DECODE_QSTR_INDEX; qst = qstr_table[idx]; That allows the bytecode to be fixed at compile time and not need relinking/rewriting of the qstr values. Only qstr_table needs to be linked when the .mpy is loaded. Incidentally, this helps to reduce the size of bytecode because what used to be 2-byte qstr values in the bytecode are now (mostly) 1-byte indices. If the module uses the same qstr more than two times then the bytecode is smaller than before. The following changes are measured for this commit compared to the previous (the baseline): - average 7%-9% reduction in size of .mpy files - frozen code size is reduced by about 5%-7% - importing .py files uses about 5% less RAM in total - importing .mpy files uses about 4% less RAM in total - importing .py and .mpy files takes about the same time as before The qstr indirection in the bytecode has only a small impact on VM performance. For stm32 on PYBv1.0 the performance change of this commit is: diff of scores (higher is better) N=100 M=100 baseline -> this-commit diff diff% (error%) bm_chaos.py 371.07 -> 357.39 : -13.68 = -3.687% (+/-0.02%) bm_fannkuch.py 78.72 -> 77.49 : -1.23 = -1.563% (+/-0.01%) bm_fft.py 2591.73 -> 2539.28 : -52.45 = -2.024% (+/-0.00%) bm_float.py 6034.93 -> 5908.30 : -126.63 = -2.098% (+/-0.01%) bm_hexiom.py 48.96 -> 47.93 : -1.03 = -2.104% (+/-0.00%) bm_nqueens.py 4510.63 -> 4459.94 : -50.69 = -1.124% (+/-0.00%) bm_pidigits.py 650.28 -> 644.96 : -5.32 = -0.818% (+/-0.23%) core_import_mpy_multi.py 564.77 -> 581.49 : +16.72 = +2.960% (+/-0.01%) core_import_mpy_single.py 68.67 -> 67.16 : -1.51 = -2.199% (+/-0.01%) core_qstr.py 64.16 -> 64.12 : -0.04 = -0.062% (+/-0.00%) core_yield_from.py 362.58 -> 354.50 : -8.08 = -2.228% (+/-0.00%) misc_aes.py 429.69 -> 405.59 : -24.10 = -5.609% (+/-0.01%) misc_mandel.py 3485.13 -> 3416.51 : -68.62 = -1.969% (+/-0.00%) misc_pystone.py 2496.53 -> 2405.56 : -90.97 = -3.644% (+/-0.01%) misc_raytrace.py 381.47 -> 374.01 : -7.46 = -1.956% (+/-0.01%) viper_call0.py 576.73 -> 572.49 : -4.24 = -0.735% (+/-0.04%) viper_call1a.py 550.37 -> 546.21 : -4.16 = -0.756% (+/-0.09%) viper_call1b.py 438.23 -> 435.68 : -2.55 = -0.582% (+/-0.06%) viper_call1c.py 442.84 -> 440.04 : -2.80 = -0.632% (+/-0.08%) viper_call2a.py 536.31 -> 532.35 : -3.96 = -0.738% (+/-0.06%) viper_call2b.py 382.34 -> 377.07 : -5.27 = -1.378% (+/-0.03%) And for unix on x64: diff of scores (higher is better) N=2000 M=2000 baseline -> this-commit diff diff% (error%) bm_chaos.py 13594.20 -> 13073.84 : -520.36 = -3.828% (+/-5.44%) bm_fannkuch.py 60.63 -> 59.58 : -1.05 = -1.732% (+/-3.01%) bm_fft.py 112009.15 -> 111603.32 : -405.83 = -0.362% (+/-4.03%) bm_float.py 246202.55 -> 247923.81 : +1721.26 = +0.699% (+/-2.79%) bm_hexiom.py 615.65 -> 617.21 : +1.56 = +0.253% (+/-1.64%) bm_nqueens.py 215807.95 -> 215600.96 : -206.99 = -0.096% (+/-3.52%) bm_pidigits.py 8246.74 -> 8422.82 : +176.08 = +2.135% (+/-3.64%) misc_aes.py 16133.00 -> 16452.74 : +319.74 = +1.982% (+/-1.50%) misc_mandel.py 128146.69 -> 130796.43 : +2649.74 = +2.068% (+/-3.18%) misc_pystone.py 83811.49 -> 83124.85 : -686.64 = -0.819% (+/-1.03%) misc_raytrace.py 21688.02 -> 21385.10 : -302.92 = -1.397% (+/-3.20%) The code size change is (firmware with a lot of frozen code benefits the most): bare-arm: +396 +0.697% minimal x86: +1595 +0.979% [incl +32(data)] unix x64: +2408 +0.470% [incl +800(data)] unix nanbox: +1396 +0.309% [incl -96(data)] stm32: -1256 -0.318% PYBV10 cc3200: +288 +0.157% esp8266: -260 -0.037% GENERIC esp32: -216 -0.014% GENERIC[incl -1072(data)] nrf: +116 +0.067% pca10040 rp2: -664 -0.135% PICO samd: +844 +0.607% ADAFRUIT_ITSYBITSY_M4_EXPRESS As part of this change the .mpy file format version is bumped to version 6. And mpy-tool.py has been improved to provide a good visualisation of the contents of .mpy files. In summary: this commit changes the bytecode to use qstr indirection, and reworks the .mpy file format to be simpler and allow .mpy files to be executed in-place. Performance is not impacted too much. Eventually it will be possible to store such .mpy files in a linear, read-only, memory- mappable filesystem so they can be executed from flash/ROM. This will essentially be able to replace frozen code for most applications. Signed-off-by: Damien George <damien@micropython.org>
2021-10-22 13:22:47 +02:00
mp_store_attr(MP_OBJ_FROM_PTR(&context->module), MP_QSTR___file__, MP_OBJ_NEW_QSTR(source_name));
#endif
// parse, compile and execute the module in its context
py: Rework bytecode and .mpy file format to be mostly static data. Background: .mpy files are precompiled .py files, built using mpy-cross, that contain compiled bytecode functions (and can also contain machine code). The benefit of using an .mpy file over a .py file is that they are faster to import and take less memory when importing. They are also smaller on disk. But the real benefit of .mpy files comes when they are frozen into the firmware. This is done by loading the .mpy file during compilation of the firmware and turning it into a set of big C data structures (the job of mpy-tool.py), which are then compiled and downloaded into the ROM of a device. These C data structures can be executed in-place, ie directly from ROM. This makes importing even faster because there is very little to do, and also means such frozen modules take up much less RAM (because their bytecode stays in ROM). The downside of frozen code is that it requires recompiling and reflashing the entire firmware. This can be a big barrier to entry, slows down development time, and makes it harder to do OTA updates of frozen code (because the whole firmware must be updated). This commit attempts to solve this problem by providing a solution that sits between loading .mpy files into RAM and freezing them into the firmware. The .mpy file format has been reworked so that it consists of data and bytecode which is mostly static and ready to run in-place. If these new .mpy files are located in flash/ROM which is memory addressable, the .mpy file can be executed (mostly) in-place. With this approach there is still a small amount of unpacking and linking of the .mpy file that needs to be done when it's imported, but it's still much better than loading an .mpy from disk into RAM (although not as good as freezing .mpy files into the firmware). The main trick to make static .mpy files is to adjust the bytecode so any qstrs that it references now go through a lookup table to convert from local qstr number in the module to global qstr number in the firmware. That means the bytecode does not need linking/rewriting of qstrs when it's loaded. Instead only a small qstr table needs to be built (and put in RAM) at import time. This means the bytecode itself is static/constant and can be used directly if it's in addressable memory. Also the qstr string data in the .mpy file, and some constant object data, can be used directly. Note that the qstr table is global to the module (ie not per function). In more detail, in the VM what used to be (schematically): qst = DECODE_QSTR_VALUE; is now (schematically): idx = DECODE_QSTR_INDEX; qst = qstr_table[idx]; That allows the bytecode to be fixed at compile time and not need relinking/rewriting of the qstr values. Only qstr_table needs to be linked when the .mpy is loaded. Incidentally, this helps to reduce the size of bytecode because what used to be 2-byte qstr values in the bytecode are now (mostly) 1-byte indices. If the module uses the same qstr more than two times then the bytecode is smaller than before. The following changes are measured for this commit compared to the previous (the baseline): - average 7%-9% reduction in size of .mpy files - frozen code size is reduced by about 5%-7% - importing .py files uses about 5% less RAM in total - importing .mpy files uses about 4% less RAM in total - importing .py and .mpy files takes about the same time as before The qstr indirection in the bytecode has only a small impact on VM performance. For stm32 on PYBv1.0 the performance change of this commit is: diff of scores (higher is better) N=100 M=100 baseline -> this-commit diff diff% (error%) bm_chaos.py 371.07 -> 357.39 : -13.68 = -3.687% (+/-0.02%) bm_fannkuch.py 78.72 -> 77.49 : -1.23 = -1.563% (+/-0.01%) bm_fft.py 2591.73 -> 2539.28 : -52.45 = -2.024% (+/-0.00%) bm_float.py 6034.93 -> 5908.30 : -126.63 = -2.098% (+/-0.01%) bm_hexiom.py 48.96 -> 47.93 : -1.03 = -2.104% (+/-0.00%) bm_nqueens.py 4510.63 -> 4459.94 : -50.69 = -1.124% (+/-0.00%) bm_pidigits.py 650.28 -> 644.96 : -5.32 = -0.818% (+/-0.23%) core_import_mpy_multi.py 564.77 -> 581.49 : +16.72 = +2.960% (+/-0.01%) core_import_mpy_single.py 68.67 -> 67.16 : -1.51 = -2.199% (+/-0.01%) core_qstr.py 64.16 -> 64.12 : -0.04 = -0.062% (+/-0.00%) core_yield_from.py 362.58 -> 354.50 : -8.08 = -2.228% (+/-0.00%) misc_aes.py 429.69 -> 405.59 : -24.10 = -5.609% (+/-0.01%) misc_mandel.py 3485.13 -> 3416.51 : -68.62 = -1.969% (+/-0.00%) misc_pystone.py 2496.53 -> 2405.56 : -90.97 = -3.644% (+/-0.01%) misc_raytrace.py 381.47 -> 374.01 : -7.46 = -1.956% (+/-0.01%) viper_call0.py 576.73 -> 572.49 : -4.24 = -0.735% (+/-0.04%) viper_call1a.py 550.37 -> 546.21 : -4.16 = -0.756% (+/-0.09%) viper_call1b.py 438.23 -> 435.68 : -2.55 = -0.582% (+/-0.06%) viper_call1c.py 442.84 -> 440.04 : -2.80 = -0.632% (+/-0.08%) viper_call2a.py 536.31 -> 532.35 : -3.96 = -0.738% (+/-0.06%) viper_call2b.py 382.34 -> 377.07 : -5.27 = -1.378% (+/-0.03%) And for unix on x64: diff of scores (higher is better) N=2000 M=2000 baseline -> this-commit diff diff% (error%) bm_chaos.py 13594.20 -> 13073.84 : -520.36 = -3.828% (+/-5.44%) bm_fannkuch.py 60.63 -> 59.58 : -1.05 = -1.732% (+/-3.01%) bm_fft.py 112009.15 -> 111603.32 : -405.83 = -0.362% (+/-4.03%) bm_float.py 246202.55 -> 247923.81 : +1721.26 = +0.699% (+/-2.79%) bm_hexiom.py 615.65 -> 617.21 : +1.56 = +0.253% (+/-1.64%) bm_nqueens.py 215807.95 -> 215600.96 : -206.99 = -0.096% (+/-3.52%) bm_pidigits.py 8246.74 -> 8422.82 : +176.08 = +2.135% (+/-3.64%) misc_aes.py 16133.00 -> 16452.74 : +319.74 = +1.982% (+/-1.50%) misc_mandel.py 128146.69 -> 130796.43 : +2649.74 = +2.068% (+/-3.18%) misc_pystone.py 83811.49 -> 83124.85 : -686.64 = -0.819% (+/-1.03%) misc_raytrace.py 21688.02 -> 21385.10 : -302.92 = -1.397% (+/-3.20%) The code size change is (firmware with a lot of frozen code benefits the most): bare-arm: +396 +0.697% minimal x86: +1595 +0.979% [incl +32(data)] unix x64: +2408 +0.470% [incl +800(data)] unix nanbox: +1396 +0.309% [incl -96(data)] stm32: -1256 -0.318% PYBV10 cc3200: +288 +0.157% esp8266: -260 -0.037% GENERIC esp32: -216 -0.014% GENERIC[incl -1072(data)] nrf: +116 +0.067% pca10040 rp2: -664 -0.135% PICO samd: +844 +0.607% ADAFRUIT_ITSYBITSY_M4_EXPRESS As part of this change the .mpy file format version is bumped to version 6. And mpy-tool.py has been improved to provide a good visualisation of the contents of .mpy files. In summary: this commit changes the bytecode to use qstr indirection, and reworks the .mpy file format to be simpler and allow .mpy files to be executed in-place. Performance is not impacted too much. Eventually it will be possible to store such .mpy files in a linear, read-only, memory- mappable filesystem so they can be executed from flash/ROM. This will essentially be able to replace frozen code for most applications. Signed-off-by: Damien George <damien@micropython.org>
2021-10-22 13:22:47 +02:00
mp_obj_dict_t *mod_globals = context->module.globals;
mp_parse_compile_execute(lex, MP_PARSE_FILE_INPUT, mod_globals, mod_globals);
}
#endif
#if (MICROPY_HAS_FILE_READER && MICROPY_PERSISTENT_CODE_LOAD) || MICROPY_MODULE_FROZEN_MPY
STATIC void do_execute_raw_code(const mp_module_context_t *context, const mp_raw_code_t *rc, qstr source_name) {
#if MICROPY_PY___FILE__
mp_store_attr(MP_OBJ_FROM_PTR(&context->module), MP_QSTR___file__, MP_OBJ_NEW_QSTR(source_name));
#else
(void)source_name;
#endif
// execute the module in its context
py: Rework bytecode and .mpy file format to be mostly static data. Background: .mpy files are precompiled .py files, built using mpy-cross, that contain compiled bytecode functions (and can also contain machine code). The benefit of using an .mpy file over a .py file is that they are faster to import and take less memory when importing. They are also smaller on disk. But the real benefit of .mpy files comes when they are frozen into the firmware. This is done by loading the .mpy file during compilation of the firmware and turning it into a set of big C data structures (the job of mpy-tool.py), which are then compiled and downloaded into the ROM of a device. These C data structures can be executed in-place, ie directly from ROM. This makes importing even faster because there is very little to do, and also means such frozen modules take up much less RAM (because their bytecode stays in ROM). The downside of frozen code is that it requires recompiling and reflashing the entire firmware. This can be a big barrier to entry, slows down development time, and makes it harder to do OTA updates of frozen code (because the whole firmware must be updated). This commit attempts to solve this problem by providing a solution that sits between loading .mpy files into RAM and freezing them into the firmware. The .mpy file format has been reworked so that it consists of data and bytecode which is mostly static and ready to run in-place. If these new .mpy files are located in flash/ROM which is memory addressable, the .mpy file can be executed (mostly) in-place. With this approach there is still a small amount of unpacking and linking of the .mpy file that needs to be done when it's imported, but it's still much better than loading an .mpy from disk into RAM (although not as good as freezing .mpy files into the firmware). The main trick to make static .mpy files is to adjust the bytecode so any qstrs that it references now go through a lookup table to convert from local qstr number in the module to global qstr number in the firmware. That means the bytecode does not need linking/rewriting of qstrs when it's loaded. Instead only a small qstr table needs to be built (and put in RAM) at import time. This means the bytecode itself is static/constant and can be used directly if it's in addressable memory. Also the qstr string data in the .mpy file, and some constant object data, can be used directly. Note that the qstr table is global to the module (ie not per function). In more detail, in the VM what used to be (schematically): qst = DECODE_QSTR_VALUE; is now (schematically): idx = DECODE_QSTR_INDEX; qst = qstr_table[idx]; That allows the bytecode to be fixed at compile time and not need relinking/rewriting of the qstr values. Only qstr_table needs to be linked when the .mpy is loaded. Incidentally, this helps to reduce the size of bytecode because what used to be 2-byte qstr values in the bytecode are now (mostly) 1-byte indices. If the module uses the same qstr more than two times then the bytecode is smaller than before. The following changes are measured for this commit compared to the previous (the baseline): - average 7%-9% reduction in size of .mpy files - frozen code size is reduced by about 5%-7% - importing .py files uses about 5% less RAM in total - importing .mpy files uses about 4% less RAM in total - importing .py and .mpy files takes about the same time as before The qstr indirection in the bytecode has only a small impact on VM performance. For stm32 on PYBv1.0 the performance change of this commit is: diff of scores (higher is better) N=100 M=100 baseline -> this-commit diff diff% (error%) bm_chaos.py 371.07 -> 357.39 : -13.68 = -3.687% (+/-0.02%) bm_fannkuch.py 78.72 -> 77.49 : -1.23 = -1.563% (+/-0.01%) bm_fft.py 2591.73 -> 2539.28 : -52.45 = -2.024% (+/-0.00%) bm_float.py 6034.93 -> 5908.30 : -126.63 = -2.098% (+/-0.01%) bm_hexiom.py 48.96 -> 47.93 : -1.03 = -2.104% (+/-0.00%) bm_nqueens.py 4510.63 -> 4459.94 : -50.69 = -1.124% (+/-0.00%) bm_pidigits.py 650.28 -> 644.96 : -5.32 = -0.818% (+/-0.23%) core_import_mpy_multi.py 564.77 -> 581.49 : +16.72 = +2.960% (+/-0.01%) core_import_mpy_single.py 68.67 -> 67.16 : -1.51 = -2.199% (+/-0.01%) core_qstr.py 64.16 -> 64.12 : -0.04 = -0.062% (+/-0.00%) core_yield_from.py 362.58 -> 354.50 : -8.08 = -2.228% (+/-0.00%) misc_aes.py 429.69 -> 405.59 : -24.10 = -5.609% (+/-0.01%) misc_mandel.py 3485.13 -> 3416.51 : -68.62 = -1.969% (+/-0.00%) misc_pystone.py 2496.53 -> 2405.56 : -90.97 = -3.644% (+/-0.01%) misc_raytrace.py 381.47 -> 374.01 : -7.46 = -1.956% (+/-0.01%) viper_call0.py 576.73 -> 572.49 : -4.24 = -0.735% (+/-0.04%) viper_call1a.py 550.37 -> 546.21 : -4.16 = -0.756% (+/-0.09%) viper_call1b.py 438.23 -> 435.68 : -2.55 = -0.582% (+/-0.06%) viper_call1c.py 442.84 -> 440.04 : -2.80 = -0.632% (+/-0.08%) viper_call2a.py 536.31 -> 532.35 : -3.96 = -0.738% (+/-0.06%) viper_call2b.py 382.34 -> 377.07 : -5.27 = -1.378% (+/-0.03%) And for unix on x64: diff of scores (higher is better) N=2000 M=2000 baseline -> this-commit diff diff% (error%) bm_chaos.py 13594.20 -> 13073.84 : -520.36 = -3.828% (+/-5.44%) bm_fannkuch.py 60.63 -> 59.58 : -1.05 = -1.732% (+/-3.01%) bm_fft.py 112009.15 -> 111603.32 : -405.83 = -0.362% (+/-4.03%) bm_float.py 246202.55 -> 247923.81 : +1721.26 = +0.699% (+/-2.79%) bm_hexiom.py 615.65 -> 617.21 : +1.56 = +0.253% (+/-1.64%) bm_nqueens.py 215807.95 -> 215600.96 : -206.99 = -0.096% (+/-3.52%) bm_pidigits.py 8246.74 -> 8422.82 : +176.08 = +2.135% (+/-3.64%) misc_aes.py 16133.00 -> 16452.74 : +319.74 = +1.982% (+/-1.50%) misc_mandel.py 128146.69 -> 130796.43 : +2649.74 = +2.068% (+/-3.18%) misc_pystone.py 83811.49 -> 83124.85 : -686.64 = -0.819% (+/-1.03%) misc_raytrace.py 21688.02 -> 21385.10 : -302.92 = -1.397% (+/-3.20%) The code size change is (firmware with a lot of frozen code benefits the most): bare-arm: +396 +0.697% minimal x86: +1595 +0.979% [incl +32(data)] unix x64: +2408 +0.470% [incl +800(data)] unix nanbox: +1396 +0.309% [incl -96(data)] stm32: -1256 -0.318% PYBV10 cc3200: +288 +0.157% esp8266: -260 -0.037% GENERIC esp32: -216 -0.014% GENERIC[incl -1072(data)] nrf: +116 +0.067% pca10040 rp2: -664 -0.135% PICO samd: +844 +0.607% ADAFRUIT_ITSYBITSY_M4_EXPRESS As part of this change the .mpy file format version is bumped to version 6. And mpy-tool.py has been improved to provide a good visualisation of the contents of .mpy files. In summary: this commit changes the bytecode to use qstr indirection, and reworks the .mpy file format to be simpler and allow .mpy files to be executed in-place. Performance is not impacted too much. Eventually it will be possible to store such .mpy files in a linear, read-only, memory- mappable filesystem so they can be executed from flash/ROM. This will essentially be able to replace frozen code for most applications. Signed-off-by: Damien George <damien@micropython.org>
2021-10-22 13:22:47 +02:00
mp_obj_dict_t *mod_globals = context->module.globals;
// save context
nlr_jump_callback_node_globals_locals_t ctx;
ctx.globals = mp_globals_get();
ctx.locals = mp_locals_get();
// set new context
mp_globals_set(mod_globals);
mp_locals_set(mod_globals);
// set exception handler to restore context if an exception is raised
nlr_push_jump_callback(&ctx.callback, mp_globals_locals_set_from_nlr_jump_callback);
// make and execute the function
mp_obj_t module_fun = mp_make_function_from_raw_code(rc, context, NULL);
mp_call_function_0(module_fun);
// deregister exception handler and restore context
nlr_pop_jump_callback(true);
}
#endif
py: Rework bytecode and .mpy file format to be mostly static data. Background: .mpy files are precompiled .py files, built using mpy-cross, that contain compiled bytecode functions (and can also contain machine code). The benefit of using an .mpy file over a .py file is that they are faster to import and take less memory when importing. They are also smaller on disk. But the real benefit of .mpy files comes when they are frozen into the firmware. This is done by loading the .mpy file during compilation of the firmware and turning it into a set of big C data structures (the job of mpy-tool.py), which are then compiled and downloaded into the ROM of a device. These C data structures can be executed in-place, ie directly from ROM. This makes importing even faster because there is very little to do, and also means such frozen modules take up much less RAM (because their bytecode stays in ROM). The downside of frozen code is that it requires recompiling and reflashing the entire firmware. This can be a big barrier to entry, slows down development time, and makes it harder to do OTA updates of frozen code (because the whole firmware must be updated). This commit attempts to solve this problem by providing a solution that sits between loading .mpy files into RAM and freezing them into the firmware. The .mpy file format has been reworked so that it consists of data and bytecode which is mostly static and ready to run in-place. If these new .mpy files are located in flash/ROM which is memory addressable, the .mpy file can be executed (mostly) in-place. With this approach there is still a small amount of unpacking and linking of the .mpy file that needs to be done when it's imported, but it's still much better than loading an .mpy from disk into RAM (although not as good as freezing .mpy files into the firmware). The main trick to make static .mpy files is to adjust the bytecode so any qstrs that it references now go through a lookup table to convert from local qstr number in the module to global qstr number in the firmware. That means the bytecode does not need linking/rewriting of qstrs when it's loaded. Instead only a small qstr table needs to be built (and put in RAM) at import time. This means the bytecode itself is static/constant and can be used directly if it's in addressable memory. Also the qstr string data in the .mpy file, and some constant object data, can be used directly. Note that the qstr table is global to the module (ie not per function). In more detail, in the VM what used to be (schematically): qst = DECODE_QSTR_VALUE; is now (schematically): idx = DECODE_QSTR_INDEX; qst = qstr_table[idx]; That allows the bytecode to be fixed at compile time and not need relinking/rewriting of the qstr values. Only qstr_table needs to be linked when the .mpy is loaded. Incidentally, this helps to reduce the size of bytecode because what used to be 2-byte qstr values in the bytecode are now (mostly) 1-byte indices. If the module uses the same qstr more than two times then the bytecode is smaller than before. The following changes are measured for this commit compared to the previous (the baseline): - average 7%-9% reduction in size of .mpy files - frozen code size is reduced by about 5%-7% - importing .py files uses about 5% less RAM in total - importing .mpy files uses about 4% less RAM in total - importing .py and .mpy files takes about the same time as before The qstr indirection in the bytecode has only a small impact on VM performance. For stm32 on PYBv1.0 the performance change of this commit is: diff of scores (higher is better) N=100 M=100 baseline -> this-commit diff diff% (error%) bm_chaos.py 371.07 -> 357.39 : -13.68 = -3.687% (+/-0.02%) bm_fannkuch.py 78.72 -> 77.49 : -1.23 = -1.563% (+/-0.01%) bm_fft.py 2591.73 -> 2539.28 : -52.45 = -2.024% (+/-0.00%) bm_float.py 6034.93 -> 5908.30 : -126.63 = -2.098% (+/-0.01%) bm_hexiom.py 48.96 -> 47.93 : -1.03 = -2.104% (+/-0.00%) bm_nqueens.py 4510.63 -> 4459.94 : -50.69 = -1.124% (+/-0.00%) bm_pidigits.py 650.28 -> 644.96 : -5.32 = -0.818% (+/-0.23%) core_import_mpy_multi.py 564.77 -> 581.49 : +16.72 = +2.960% (+/-0.01%) core_import_mpy_single.py 68.67 -> 67.16 : -1.51 = -2.199% (+/-0.01%) core_qstr.py 64.16 -> 64.12 : -0.04 = -0.062% (+/-0.00%) core_yield_from.py 362.58 -> 354.50 : -8.08 = -2.228% (+/-0.00%) misc_aes.py 429.69 -> 405.59 : -24.10 = -5.609% (+/-0.01%) misc_mandel.py 3485.13 -> 3416.51 : -68.62 = -1.969% (+/-0.00%) misc_pystone.py 2496.53 -> 2405.56 : -90.97 = -3.644% (+/-0.01%) misc_raytrace.py 381.47 -> 374.01 : -7.46 = -1.956% (+/-0.01%) viper_call0.py 576.73 -> 572.49 : -4.24 = -0.735% (+/-0.04%) viper_call1a.py 550.37 -> 546.21 : -4.16 = -0.756% (+/-0.09%) viper_call1b.py 438.23 -> 435.68 : -2.55 = -0.582% (+/-0.06%) viper_call1c.py 442.84 -> 440.04 : -2.80 = -0.632% (+/-0.08%) viper_call2a.py 536.31 -> 532.35 : -3.96 = -0.738% (+/-0.06%) viper_call2b.py 382.34 -> 377.07 : -5.27 = -1.378% (+/-0.03%) And for unix on x64: diff of scores (higher is better) N=2000 M=2000 baseline -> this-commit diff diff% (error%) bm_chaos.py 13594.20 -> 13073.84 : -520.36 = -3.828% (+/-5.44%) bm_fannkuch.py 60.63 -> 59.58 : -1.05 = -1.732% (+/-3.01%) bm_fft.py 112009.15 -> 111603.32 : -405.83 = -0.362% (+/-4.03%) bm_float.py 246202.55 -> 247923.81 : +1721.26 = +0.699% (+/-2.79%) bm_hexiom.py 615.65 -> 617.21 : +1.56 = +0.253% (+/-1.64%) bm_nqueens.py 215807.95 -> 215600.96 : -206.99 = -0.096% (+/-3.52%) bm_pidigits.py 8246.74 -> 8422.82 : +176.08 = +2.135% (+/-3.64%) misc_aes.py 16133.00 -> 16452.74 : +319.74 = +1.982% (+/-1.50%) misc_mandel.py 128146.69 -> 130796.43 : +2649.74 = +2.068% (+/-3.18%) misc_pystone.py 83811.49 -> 83124.85 : -686.64 = -0.819% (+/-1.03%) misc_raytrace.py 21688.02 -> 21385.10 : -302.92 = -1.397% (+/-3.20%) The code size change is (firmware with a lot of frozen code benefits the most): bare-arm: +396 +0.697% minimal x86: +1595 +0.979% [incl +32(data)] unix x64: +2408 +0.470% [incl +800(data)] unix nanbox: +1396 +0.309% [incl -96(data)] stm32: -1256 -0.318% PYBV10 cc3200: +288 +0.157% esp8266: -260 -0.037% GENERIC esp32: -216 -0.014% GENERIC[incl -1072(data)] nrf: +116 +0.067% pca10040 rp2: -664 -0.135% PICO samd: +844 +0.607% ADAFRUIT_ITSYBITSY_M4_EXPRESS As part of this change the .mpy file format version is bumped to version 6. And mpy-tool.py has been improved to provide a good visualisation of the contents of .mpy files. In summary: this commit changes the bytecode to use qstr indirection, and reworks the .mpy file format to be simpler and allow .mpy files to be executed in-place. Performance is not impacted too much. Eventually it will be possible to store such .mpy files in a linear, read-only, memory- mappable filesystem so they can be executed from flash/ROM. This will essentially be able to replace frozen code for most applications. Signed-off-by: Damien George <damien@micropython.org>
2021-10-22 13:22:47 +02:00
STATIC void do_load(mp_module_context_t *module_obj, vstr_t *file) {
#if MICROPY_MODULE_FROZEN || MICROPY_ENABLE_COMPILER || (MICROPY_PERSISTENT_CODE_LOAD && MICROPY_HAS_FILE_READER)
const char *file_str = vstr_null_terminated_str(file);
#endif
// If we support frozen modules (either as str or mpy) then try to find the
// requested filename in the list of frozen module filenames.
#if MICROPY_MODULE_FROZEN
void *modref;
int frozen_type;
const int frozen_path_prefix_len = strlen(MP_FROZEN_PATH_PREFIX);
if (strncmp(file_str, MP_FROZEN_PATH_PREFIX, frozen_path_prefix_len) == 0) {
mp_find_frozen_module(file_str + frozen_path_prefix_len, &frozen_type, &modref);
// If we support frozen str modules and the compiler is enabled, and we
// found the filename in the list of frozen files, then load and execute it.
#if MICROPY_MODULE_FROZEN_STR
if (frozen_type == MP_FROZEN_STR) {
do_load_from_lexer(module_obj, modref);
return;
}
#endif
// If we support frozen mpy modules and we found a corresponding file (and
// its data) in the list of frozen files, execute it.
#if MICROPY_MODULE_FROZEN_MPY
if (frozen_type == MP_FROZEN_MPY) {
py: Rework bytecode and .mpy file format to be mostly static data. Background: .mpy files are precompiled .py files, built using mpy-cross, that contain compiled bytecode functions (and can also contain machine code). The benefit of using an .mpy file over a .py file is that they are faster to import and take less memory when importing. They are also smaller on disk. But the real benefit of .mpy files comes when they are frozen into the firmware. This is done by loading the .mpy file during compilation of the firmware and turning it into a set of big C data structures (the job of mpy-tool.py), which are then compiled and downloaded into the ROM of a device. These C data structures can be executed in-place, ie directly from ROM. This makes importing even faster because there is very little to do, and also means such frozen modules take up much less RAM (because their bytecode stays in ROM). The downside of frozen code is that it requires recompiling and reflashing the entire firmware. This can be a big barrier to entry, slows down development time, and makes it harder to do OTA updates of frozen code (because the whole firmware must be updated). This commit attempts to solve this problem by providing a solution that sits between loading .mpy files into RAM and freezing them into the firmware. The .mpy file format has been reworked so that it consists of data and bytecode which is mostly static and ready to run in-place. If these new .mpy files are located in flash/ROM which is memory addressable, the .mpy file can be executed (mostly) in-place. With this approach there is still a small amount of unpacking and linking of the .mpy file that needs to be done when it's imported, but it's still much better than loading an .mpy from disk into RAM (although not as good as freezing .mpy files into the firmware). The main trick to make static .mpy files is to adjust the bytecode so any qstrs that it references now go through a lookup table to convert from local qstr number in the module to global qstr number in the firmware. That means the bytecode does not need linking/rewriting of qstrs when it's loaded. Instead only a small qstr table needs to be built (and put in RAM) at import time. This means the bytecode itself is static/constant and can be used directly if it's in addressable memory. Also the qstr string data in the .mpy file, and some constant object data, can be used directly. Note that the qstr table is global to the module (ie not per function). In more detail, in the VM what used to be (schematically): qst = DECODE_QSTR_VALUE; is now (schematically): idx = DECODE_QSTR_INDEX; qst = qstr_table[idx]; That allows the bytecode to be fixed at compile time and not need relinking/rewriting of the qstr values. Only qstr_table needs to be linked when the .mpy is loaded. Incidentally, this helps to reduce the size of bytecode because what used to be 2-byte qstr values in the bytecode are now (mostly) 1-byte indices. If the module uses the same qstr more than two times then the bytecode is smaller than before. The following changes are measured for this commit compared to the previous (the baseline): - average 7%-9% reduction in size of .mpy files - frozen code size is reduced by about 5%-7% - importing .py files uses about 5% less RAM in total - importing .mpy files uses about 4% less RAM in total - importing .py and .mpy files takes about the same time as before The qstr indirection in the bytecode has only a small impact on VM performance. For stm32 on PYBv1.0 the performance change of this commit is: diff of scores (higher is better) N=100 M=100 baseline -> this-commit diff diff% (error%) bm_chaos.py 371.07 -> 357.39 : -13.68 = -3.687% (+/-0.02%) bm_fannkuch.py 78.72 -> 77.49 : -1.23 = -1.563% (+/-0.01%) bm_fft.py 2591.73 -> 2539.28 : -52.45 = -2.024% (+/-0.00%) bm_float.py 6034.93 -> 5908.30 : -126.63 = -2.098% (+/-0.01%) bm_hexiom.py 48.96 -> 47.93 : -1.03 = -2.104% (+/-0.00%) bm_nqueens.py 4510.63 -> 4459.94 : -50.69 = -1.124% (+/-0.00%) bm_pidigits.py 650.28 -> 644.96 : -5.32 = -0.818% (+/-0.23%) core_import_mpy_multi.py 564.77 -> 581.49 : +16.72 = +2.960% (+/-0.01%) core_import_mpy_single.py 68.67 -> 67.16 : -1.51 = -2.199% (+/-0.01%) core_qstr.py 64.16 -> 64.12 : -0.04 = -0.062% (+/-0.00%) core_yield_from.py 362.58 -> 354.50 : -8.08 = -2.228% (+/-0.00%) misc_aes.py 429.69 -> 405.59 : -24.10 = -5.609% (+/-0.01%) misc_mandel.py 3485.13 -> 3416.51 : -68.62 = -1.969% (+/-0.00%) misc_pystone.py 2496.53 -> 2405.56 : -90.97 = -3.644% (+/-0.01%) misc_raytrace.py 381.47 -> 374.01 : -7.46 = -1.956% (+/-0.01%) viper_call0.py 576.73 -> 572.49 : -4.24 = -0.735% (+/-0.04%) viper_call1a.py 550.37 -> 546.21 : -4.16 = -0.756% (+/-0.09%) viper_call1b.py 438.23 -> 435.68 : -2.55 = -0.582% (+/-0.06%) viper_call1c.py 442.84 -> 440.04 : -2.80 = -0.632% (+/-0.08%) viper_call2a.py 536.31 -> 532.35 : -3.96 = -0.738% (+/-0.06%) viper_call2b.py 382.34 -> 377.07 : -5.27 = -1.378% (+/-0.03%) And for unix on x64: diff of scores (higher is better) N=2000 M=2000 baseline -> this-commit diff diff% (error%) bm_chaos.py 13594.20 -> 13073.84 : -520.36 = -3.828% (+/-5.44%) bm_fannkuch.py 60.63 -> 59.58 : -1.05 = -1.732% (+/-3.01%) bm_fft.py 112009.15 -> 111603.32 : -405.83 = -0.362% (+/-4.03%) bm_float.py 246202.55 -> 247923.81 : +1721.26 = +0.699% (+/-2.79%) bm_hexiom.py 615.65 -> 617.21 : +1.56 = +0.253% (+/-1.64%) bm_nqueens.py 215807.95 -> 215600.96 : -206.99 = -0.096% (+/-3.52%) bm_pidigits.py 8246.74 -> 8422.82 : +176.08 = +2.135% (+/-3.64%) misc_aes.py 16133.00 -> 16452.74 : +319.74 = +1.982% (+/-1.50%) misc_mandel.py 128146.69 -> 130796.43 : +2649.74 = +2.068% (+/-3.18%) misc_pystone.py 83811.49 -> 83124.85 : -686.64 = -0.819% (+/-1.03%) misc_raytrace.py 21688.02 -> 21385.10 : -302.92 = -1.397% (+/-3.20%) The code size change is (firmware with a lot of frozen code benefits the most): bare-arm: +396 +0.697% minimal x86: +1595 +0.979% [incl +32(data)] unix x64: +2408 +0.470% [incl +800(data)] unix nanbox: +1396 +0.309% [incl -96(data)] stm32: -1256 -0.318% PYBV10 cc3200: +288 +0.157% esp8266: -260 -0.037% GENERIC esp32: -216 -0.014% GENERIC[incl -1072(data)] nrf: +116 +0.067% pca10040 rp2: -664 -0.135% PICO samd: +844 +0.607% ADAFRUIT_ITSYBITSY_M4_EXPRESS As part of this change the .mpy file format version is bumped to version 6. And mpy-tool.py has been improved to provide a good visualisation of the contents of .mpy files. In summary: this commit changes the bytecode to use qstr indirection, and reworks the .mpy file format to be simpler and allow .mpy files to be executed in-place. Performance is not impacted too much. Eventually it will be possible to store such .mpy files in a linear, read-only, memory- mappable filesystem so they can be executed from flash/ROM. This will essentially be able to replace frozen code for most applications. Signed-off-by: Damien George <damien@micropython.org>
2021-10-22 13:22:47 +02:00
const mp_frozen_module_t *frozen = modref;
module_obj->constants = frozen->constants;
#if MICROPY_PY___FILE__
qstr frozen_file_qstr = qstr_from_str(file_str + frozen_path_prefix_len);
#else
qstr frozen_file_qstr = MP_QSTRnull;
#endif
do_execute_raw_code(module_obj, frozen->rc, frozen_file_qstr);
return;
}
#endif
}
#endif // MICROPY_MODULE_FROZEN
qstr file_qstr = qstr_from_str(file_str);
// If we support loading .mpy files then check if the file extension is of
// the correct format and, if so, load and execute the file.
#if MICROPY_HAS_FILE_READER && MICROPY_PERSISTENT_CODE_LOAD
if (file_str[file->len - 3] == 'm') {
mp_compiled_module_t cm;
cm.context = module_obj;
mp_raw_code_load_file(file_qstr, &cm);
do_execute_raw_code(cm.context, cm.rc, file_qstr);
return;
}
#endif
// If we can compile scripts then load the file and compile and execute it.
#if MICROPY_ENABLE_COMPILER
{
mp_lexer_t *lex = mp_lexer_new_from_file(file_qstr);
do_load_from_lexer(module_obj, lex);
return;
}
#else
// If we get here then the file was not frozen and we can't compile scripts.
mp_raise_msg(&mp_type_ImportError, MP_ERROR_TEXT("script compilation not supported"));
#endif
}
// Convert a relative (to the current module) import, going up "level" levels,
// into an absolute import.
STATIC void evaluate_relative_import(mp_int_t level, const char **module_name, size_t *module_name_len) {
// What we want to do here is to take the name of the current module,
// remove <level> trailing components, and concatenate the passed-in
// module name.
// For example, level=3, module_name="foo.bar", __name__="a.b.c.d" --> "a.foo.bar"
// "Relative imports use a module's __name__ attribute to determine that
// module's position in the package hierarchy."
// http://legacy.python.org/dev/peps/pep-0328/#relative-imports-and-name
mp_obj_t current_module_name_obj = mp_obj_dict_get(MP_OBJ_FROM_PTR(mp_globals_get()), MP_OBJ_NEW_QSTR(MP_QSTR___name__));
assert(current_module_name_obj != MP_OBJ_NULL);
#if MICROPY_MODULE_OVERRIDE_MAIN_IMPORT && MICROPY_CPYTHON_COMPAT
if (MP_OBJ_QSTR_VALUE(current_module_name_obj) == MP_QSTR___main__) {
// This is a module loaded by -m command-line switch (e.g. unix port),
// and so its __name__ has been set to "__main__". Get its real name
// that we stored during import in the __main__ attribute.
current_module_name_obj = mp_obj_dict_get(MP_OBJ_FROM_PTR(mp_globals_get()), MP_OBJ_NEW_QSTR(MP_QSTR___main__));
}
#endif
// If we have a __path__ in the globals dict, then we're a package.
bool is_pkg = mp_map_lookup(&mp_globals_get()->map, MP_OBJ_NEW_QSTR(MP_QSTR___path__), MP_MAP_LOOKUP);
#if DEBUG_PRINT
DEBUG_printf("Current module/package: ");
mp_obj_print_helper(MICROPY_DEBUG_PRINTER, current_module_name_obj, PRINT_REPR);
DEBUG_printf(", is_package: %d", is_pkg);
DEBUG_printf("\n");
#endif
size_t current_module_name_len;
const char *current_module_name = mp_obj_str_get_data(current_module_name_obj, &current_module_name_len);
const char *p = current_module_name + current_module_name_len;
if (is_pkg) {
// If we're evaluating relative to a package, then take off one fewer
// level (i.e. the relative search starts inside the package, rather
// than as a sibling of the package).
--level;
}
// Walk back 'level' dots (or run out of path).
while (level && p > current_module_name) {
if (*--p == '.') {
--level;
}
}
// We must have some component left over to import from.
if (p == current_module_name) {
mp_raise_msg(&mp_type_ImportError, MP_ERROR_TEXT("can't perform relative import"));
}
// New length is len("<chopped path>.<module_name>"). Note: might be one byte
// more than we need if module_name is empty (for the extra . we will
// append).
uint new_module_name_len = (size_t)(p - current_module_name) + 1 + *module_name_len;
char *new_mod = mp_local_alloc(new_module_name_len);
memcpy(new_mod, current_module_name, p - current_module_name);
// Only append ".<module_name>" if there was one).
if (*module_name_len != 0) {
new_mod[p - current_module_name] = '.';
memcpy(new_mod + (p - current_module_name) + 1, *module_name, *module_name_len);
} else {
--new_module_name_len;
}
// Copy into a QSTR.
qstr new_mod_q = qstr_from_strn(new_mod, new_module_name_len);
mp_local_free(new_mod);
DEBUG_printf("Resolved base name for relative import: '%s'\n", qstr_str(new_mod_q));
*module_name = qstr_str(new_mod_q);
*module_name_len = new_module_name_len;
}
typedef struct _nlr_jump_callback_node_unregister_module_t {
nlr_jump_callback_node_t callback;
qstr name;
} nlr_jump_callback_node_unregister_module_t;
STATIC void unregister_module_from_nlr_jump_callback(void *ctx_in) {
nlr_jump_callback_node_unregister_module_t *ctx = ctx_in;
mp_map_t *mp_loaded_modules_map = &MP_STATE_VM(mp_loaded_modules_dict).map;
mp_map_lookup(mp_loaded_modules_map, MP_OBJ_NEW_QSTR(ctx->name), MP_MAP_LOOKUP_REMOVE_IF_FOUND);
}
// Load a module at the specified absolute path, possibly as a submodule of the given outer module.
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// full_mod_name: The full absolute path up to this level (e.g. "foo.bar.baz").
// level_mod_name: The final component of the path (e.g. "baz").
// outer_module_obj: The parent module (we need to store this module as an
// attribute on it) (or MP_OBJ_NULL for top-level).
// override_main: Whether to set the __name__ to "__main__" (and use __main__
// for the actual path).
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
STATIC mp_obj_t process_import_at_level(qstr full_mod_name, qstr level_mod_name, mp_obj_t outer_module_obj, bool override_main) {
// Immediately return if the module at this level is already loaded.
mp_map_elem_t *elem;
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
#if MICROPY_PY_SYS
// If sys.path is empty, the intention is to force using a built-in. This
// means we should also ignore any loaded modules with the same name
// which may have come from the filesystem.
size_t path_num;
mp_obj_t *path_items;
mp_obj_get_array(mp_sys_path, &path_num, &path_items);
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
if (path_num)
#endif
{
elem = mp_map_lookup(&MP_STATE_VM(mp_loaded_modules_dict).map, MP_OBJ_NEW_QSTR(full_mod_name), MP_MAP_LOOKUP);
if (elem) {
return elem->value;
}
}
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
VSTR_FIXED(path, MICROPY_ALLOC_PATH_MAX);
mp_import_stat_t stat = MP_IMPORT_STAT_NO_EXIST;
mp_obj_t module_obj;
if (outer_module_obj == MP_OBJ_NULL) {
// First module in the dotted-name path.
DEBUG_printf("Searching for top-level module\n");
// An import of a non-extensible built-in will always bypass the
// filesystem. e.g. `import micropython` or `import pyb`. So try and
// match a non-extensible built-ins first.
module_obj = mp_module_get_builtin(level_mod_name, false);
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
if (module_obj != MP_OBJ_NULL) {
return module_obj;
}
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// If relative imports are enabled, try a relative filesystem import
// *before* an absolute (sys.paht) filesystem import.
#if MICROPY_RELATIVE_FILE_IMPORTS
size_t is_len;
mp_obj_t *is_items;
mp_obj_list_get(MP_STATE_VM(mp_import_stack), &is_len, &is_items);
if(is_len > 0) {
// Look next to last import
char const *prev = qstr_str(MP_OBJ_QSTR_VALUE(is_items[is_len - 1]));
char const *slash = strrchr(prev, '/');
int prev_len = (slash ? slash : prev) - prev;
DEBUG_printf("last import was '%s'\n", prev);
vstr_add_strn(&path, prev, prev_len);
vstr_add_char(&path, PATH_SEP_CHAR[0]);
vstr_add_str(&path, qstr_str(level_mod_name));
DEBUG_printf("checking for relative imoprt at '%.*s'\n",
(int)vstr_len(&path), vstr_str(&path));
stat = stat_module(&path);
}
#endif
// Next try the filesystem. Search for a directory or file relative to
// all the locations in sys.path.
if(stat == MP_IMPORT_STAT_NO_EXIST) {
stat = stat_top_level(level_mod_name, &path);
}
// If filesystem failed, now try and see if it matches an extensible
// built-in module.
if (stat == MP_IMPORT_STAT_NO_EXIST) {
module_obj = mp_module_get_builtin(level_mod_name, true);
if (module_obj != MP_OBJ_NULL) {
return module_obj;
}
}
} else {
DEBUG_printf("Searching for sub-module\n");
#if MICROPY_MODULE_BUILTIN_SUBPACKAGES
// If the outer module is a built-in (because its map is in ROM), then
// treat it like a package if it contains this submodule in its
// globals dict.
mp_obj_module_t *mod = MP_OBJ_TO_PTR(outer_module_obj);
if (mod->globals->map.is_fixed) {
elem = mp_map_lookup(&mod->globals->map, MP_OBJ_NEW_QSTR(level_mod_name), MP_MAP_LOOKUP);
// Also verify that the entry in the globals dict is in fact a module.
if (elem && mp_obj_is_type(elem->value, &mp_type_module)) {
return elem->value;
}
}
#endif
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// If the outer module is a package, it will have __path__ set.
// We can use that as the path to search inside.
mp_obj_t dest[2];
mp_load_method_maybe(outer_module_obj, MP_QSTR___path__, dest);
if (dest[0] != MP_OBJ_NULL) {
// e.g. __path__ will be "<matched search path>/foo/bar"
vstr_add_str(&path, mp_obj_str_get_str(dest[0]));
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// Add the level module name to the path to get "<matched search path>/foo/bar/baz".
vstr_add_char(&path, PATH_SEP_CHAR[0]);
vstr_add_str(&path, qstr_str(level_mod_name));
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
stat = stat_module(&path);
}
}
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// Not already loaded, and not a built-in, so look at the stat result from the filesystem/frozen.
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
if (stat == MP_IMPORT_STAT_NO_EXIST) {
// Not found -- fail.
#if MICROPY_ERROR_REPORTING <= MICROPY_ERROR_REPORTING_TERSE
mp_raise_msg(&mp_type_ImportError, MP_ERROR_TEXT("module not found"));
#else
mp_raise_msg_varg(&mp_type_ImportError, MP_ERROR_TEXT("no module named '%q'"), full_mod_name);
#endif
}
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// Module was found on the filesystem/frozen, try and load it.
DEBUG_printf("Found path to load: %.*s\n", (int)vstr_len(&path), vstr_str(&path));
// Prepare for loading from the filesystem. Create a new shell module
// and register it in sys.modules. Also make sure we remove it if
// there is any problem below.
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
module_obj = mp_obj_new_module(full_mod_name);
nlr_jump_callback_node_unregister_module_t ctx;
ctx.name = full_mod_name;
nlr_push_jump_callback(&ctx.callback, unregister_module_from_nlr_jump_callback);
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
#if MICROPY_MODULE_OVERRIDE_MAIN_IMPORT
// If this module is being loaded via -m on unix, then
// override __name__ to "__main__". Do this only for *modules*
// however - packages never have their names replaced, instead
// they're -m'ed using a special __main__ submodule in them. (This all
// apparently is done to not touch the package name itself, which is
// important for future imports).
if (override_main && stat != MP_IMPORT_STAT_DIR) {
mp_obj_module_t *o = MP_OBJ_TO_PTR(module_obj);
mp_obj_dict_store(MP_OBJ_FROM_PTR(o->globals), MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_OBJ_NEW_QSTR(MP_QSTR___main__));
#if MICROPY_CPYTHON_COMPAT
// Store module as "__main__" in the dictionary of loaded modules (returned by sys.modules).
mp_obj_dict_store(MP_OBJ_FROM_PTR(&MP_STATE_VM(mp_loaded_modules_dict)), MP_OBJ_NEW_QSTR(MP_QSTR___main__), module_obj);
// Store real name in "__main__" attribute. Need this for
// resolving relative imports later. "__main__ was chosen
// semi-randonly, to reuse existing qstr's.
mp_obj_dict_store(MP_OBJ_FROM_PTR(o->globals), MP_OBJ_NEW_QSTR(MP_QSTR___main__), MP_OBJ_NEW_QSTR(full_mod_name));
#endif
}
#endif // MICROPY_MODULE_OVERRIDE_MAIN_IMPORT
if (stat == MP_IMPORT_STAT_DIR) {
// Directory (i.e. a package).
DEBUG_printf("%.*s is dir\n", (int)vstr_len(&path), vstr_str(&path));
// Store the __path__ attribute onto this module.
// https://docs.python.org/3/reference/import.html
// "Specifically, any module that contains a __path__ attribute is considered a package."
// This gets used later to locate any subpackages of this module.
mp_store_attr(module_obj, MP_QSTR___path__, mp_obj_new_str(vstr_str(&path), vstr_len(&path)));
size_t orig_path_len = path.len;
vstr_add_str(&path, PATH_SEP_CHAR "__init__.py");
// execute "path/__init__.py" (if available).
if (stat_file_py_or_mpy(&path) == MP_IMPORT_STAT_FILE) {
do_load(MP_OBJ_TO_PTR(module_obj), &path);
} else {
// No-op. Nothing to load.
// mp_warning("%s is imported as namespace package", vstr_str(&path));
}
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// Remove /__init__.py suffix from path.
path.len = orig_path_len;
} else { // MP_IMPORT_STAT_FILE
// File -- execute "path.(m)py".
do_load(MP_OBJ_TO_PTR(module_obj), &path);
// Note: This should be the last component in the import path. If
// there are remaining components then in the next call to
// process_import_at_level will detect that it doesn't have
// a __path__ attribute, and not attempt to stat it.
}
if (outer_module_obj != MP_OBJ_NULL) {
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// If it's a sub-module then make it available on the parent module.
mp_store_attr(outer_module_obj, level_mod_name, module_obj);
}
nlr_pop_jump_callback(false);
return module_obj;
}
mp_obj_t mp_builtin___import___default(size_t n_args, const mp_obj_t *args) {
#if DEBUG_PRINT
DEBUG_printf("__import__:\n");
for (size_t i = 0; i < n_args; i++) {
DEBUG_printf(" ");
mp_obj_print_helper(MICROPY_DEBUG_PRINTER, args[i], PRINT_REPR);
DEBUG_printf("\n");
}
#endif
// This is the import path, with any leading dots stripped.
// "import foo.bar" --> module_name="foo.bar"
// "from foo.bar import baz" --> module_name="foo.bar"
// "from . import foo" --> module_name=""
// "from ...foo.bar import baz" --> module_name="foo.bar"
mp_obj_t module_name_obj = args[0];
// These are the imported names.
// i.e. "from foo.bar import baz, zap" --> fromtuple=("baz", "zap",)
// Note: There's a special case on the Unix port, where this is set to mp_const_false which means that it's __main__.
mp_obj_t fromtuple = mp_const_none;
// Level is the number of leading dots in a relative import.
// i.e. "from . import foo" --> level=1
// i.e. "from ...foo.bar import baz" --> level=3
mp_int_t level = 0;
if (n_args >= 4) {
fromtuple = args[3];
if (n_args >= 5) {
level = MP_OBJ_SMALL_INT_VALUE(args[4]);
if (level < 0) {
mp_raise_ValueError(NULL);
}
}
}
size_t module_name_len;
const char *module_name = mp_obj_str_get_data(module_name_obj, &module_name_len);
if (level != 0) {
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// Turn "foo.bar" with level=3 into "<current module 3 components>.foo.bar".
// Current module name is extracted from globals().__name__.
evaluate_relative_import(level, &module_name, &module_name_len);
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// module_name is now an absolute module path.
}
if (module_name_len == 0) {
mp_raise_ValueError(NULL);
}
DEBUG_printf("Starting module search for '%s'\n", module_name);
mp_obj_t top_module_obj = MP_OBJ_NULL;
mp_obj_t outer_module_obj = MP_OBJ_NULL;
// Search for the end of each component.
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// Iterate the absolute path, finding the end of each component of the path.
// foo.bar.baz
// ^ ^ ^
size_t current_component_start = 0;
for (size_t i = 1; i <= module_name_len; i++) {
if (i == module_name_len || module_name[i] == '.') {
// The module name up to this depth (e.g. foo.bar.baz).
qstr full_mod_name = qstr_from_strn(module_name, i);
// The current level name (e.g. baz).
qstr level_mod_name = qstr_from_strn(module_name + current_component_start, i - current_component_start);
DEBUG_printf("Processing module: '%s' at level '%s'\n", qstr_str(full_mod_name), qstr_str(level_mod_name));
#if MICROPY_MODULE_OVERRIDE_MAIN_IMPORT
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// On unix, if this is being loaded via -m (indicated by sentinel
// fromtuple=mp_const_false), then handle that if it's the final
// component.
bool override_main = (i == module_name_len && fromtuple == mp_const_false);
#else
bool override_main = false;
#endif
// Import this module.
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
mp_obj_t module_obj = process_import_at_level(full_mod_name, level_mod_name, outer_module_obj, override_main);
// Set this as the parent module, and remember the top-level module if it's the first.
outer_module_obj = module_obj;
if (top_module_obj == MP_OBJ_NULL) {
top_module_obj = module_obj;
}
current_component_start = i + 1;
}
}
if (fromtuple != mp_const_none) {
// If fromtuple is not empty, return leaf module
return outer_module_obj;
} else {
// Otherwise, we need to return top-level package
return top_module_obj;
}
}
#else // MICROPY_ENABLE_EXTERNAL_IMPORT
mp_obj_t mp_builtin___import___default(size_t n_args, const mp_obj_t *args) {
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// Check that it's not a relative import.
if (n_args >= 5 && MP_OBJ_SMALL_INT_VALUE(args[4]) != 0) {
mp_raise_NotImplementedError(MP_ERROR_TEXT("relative import"));
}
py/builtinimport: Optimise sub-package loading. This makes it so that sub-packages are resolved relative to their parent's `__path__`, rather than re-resolving each parent's filesystem path. The previous behavior was that `import foo.bar` would first re-search `sys.path` for `foo`, then use the resulting path to find `bar`. For already-loaded and u-prefixed modules, because we no longer need to build the path from level to level, we no longer unnecessarily search the filesystem. This should improve startup time. Explicitly makes the resolving process clear: - Loaded modules are returned immediately without touching the filesystem. - Exact-match of builtins are also returned immediately. - Then the filesystem search happens. - If that fails, then the weak-link handling is applied. This maintains the existing behavior: if a user writes `import time` they will get time.py if it exits, otherwise the built-in utime. Whereas `import utime` will always return the built-in. This also fixes a regression from a7fa18c203a241f670f12ab507aa8b349fcd45a1 where we search the filesystem for built-ins. It is now only possible to override u-prefixed builtins. This will remove a lot of filesystem stats at startup, as micropython-specific modules (e.g. `pyb`) will no longer attempt to look at the filesystem. Added several improvements to the comments and some minor renaming and refactoring to make it clearer how the import mechanism works. Overall code size diff is +56 bytes on STM32. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
2023-05-10 05:02:09 +02:00
// Check if the module is already loaded.
mp_map_elem_t *elem = mp_map_lookup(&MP_STATE_VM(mp_loaded_modules_dict).map, args[0], MP_MAP_LOOKUP);
if (elem) {
return elem->value;
}
// Try the name directly as a non-extensible built-in (e.g. `micropython`).
qstr module_name_qstr = mp_obj_str_get_qstr(args[0]);
mp_obj_t module_obj = mp_module_get_builtin(module_name_qstr, false);
if (module_obj != MP_OBJ_NULL) {
return module_obj;
}
// Now try as an extensible built-in (e.g. `time`).
module_obj = mp_module_get_builtin(module_name_qstr, true);
if (module_obj != MP_OBJ_NULL) {
return module_obj;
}
// Couldn't find the module, so fail
#if MICROPY_ERROR_REPORTING <= MICROPY_ERROR_REPORTING_TERSE
mp_raise_msg(&mp_type_ImportError, MP_ERROR_TEXT("module not found"));
#else
mp_raise_msg_varg(&mp_type_ImportError, MP_ERROR_TEXT("no module named '%q'"), module_name_qstr);
#endif
}
#endif // MICROPY_ENABLE_EXTERNAL_IMPORT
2014-02-03 23:46:17 +01:00
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin___import___obj, 1, 5, mp_builtin___import__);