pylibswarm

Python3 wrapper for libswarm-ng
git clone git://git.defalsify.org/pylibswarm.git
Log | Files | Refs | Submodules | README | LICENSE

commit 13b74bee1f22b9501d6ee65b2c0732f0ed876d2f
parent 7877e3831f8dbbe7dd791d400c23eb0afbeb7c4e
Author: nolash <dev@holbrook.no>
Date:   Tue, 14 Sep 2021 22:55:56 +0200

Add chunk callback/emit

Diffstat:
Mpylibswarm/runnable/file.py | 39++++++++++++++++++++++++---------------
Msrc/python_swarm.c | 35+++++++++++++++++++++++++++--------
2 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/pylibswarm/runnable/file.py b/pylibswarm/runnable/file.py @@ -11,23 +11,25 @@ argparser = argparse.ArgumentParser() argparser.add_argument('-n', action='store_true', help='skip newline at end of output') argparser.add_argument('-b', action='store_true', help='output raw bytes') argparser.add_argument('-v', action='store_true', help='verbose output') +argparser.add_argument('-o', type=str, help='chunk output directory') argparser.add_argument('file', nargs='?', type=str, help='file to hash') largs = argparser.parse_args(sys.argv[1:]) -#def stdin_arg(): -# """Retreive input arguments from stdin if they exist. -# -# Method does not block, and expects arguments to be ready on stdin before being called. -# -# :rtype: str -# :returns: Input arguments string -# """ -# h = select.select([sys.stdin.buffer], [], []) -# if len(h[0]) > 0: -# v = h[0][0].read() -# return -# return None +class Outputter: + + def __init__(self, outdir): + self.outdir = outdir + logg.info('outputter set to {}'.format(self.outdir)) + + + def dump(self, hsh, data): + hsh_hex = hsh.hex() + fp = os.path.join(self.outdir, hsh_hex) + f = open(fp, 'wb') + f.write(data) + f.close() + logg.debug('wrote {} chunk bytes for hash {}'.format(len(data), hsh_hex)) if largs.v: @@ -35,11 +37,18 @@ if largs.v: filepath = os.path.realpath(largs.file) +outputter = None +if largs.o: + outputter = Outputter(largs.o) def main(): - import swarm - r = swarm.filehash_path(filepath) + + if outputter: + r = swarm.filehash_path(filepath, outputter.dump) + else: + r = swarm.filehash_path(filepath) + if largs.b: sys.stdout.buffer.write(r[:32]) else: diff --git a/src/python_swarm.c b/src/python_swarm.c @@ -3,10 +3,19 @@ #include <stdio.h> #include <fcntl.h> #include <sys/stat.h> +#include <unistd.h> #include "bmt.h" #include "swarmfile.h" + +static void filehash_callback(const char *hash, const char *data, size_t data_length, void *callback_static) { + PyObject *callback = (PyObject*)callback_static; + + PyObject_CallFunction(callback, "y#y#", hash, _SWARM_WORD_SIZE, data, data_length); +} + + static bmt_spansize_t filehash_path(filehash_t *fctx, const char *filepath) { int fd; int r; @@ -19,6 +28,7 @@ static bmt_spansize_t filehash_path(filehash_t *fctx, const char *filepath) { if (fd == -1) { return -1; } + r = fstat(fd, &st); if (r == -1) { return -1; @@ -39,16 +49,17 @@ static bmt_spansize_t filehash_path(filehash_t *fctx, const char *filepath) { c += l; filehash_write(fctx, buf, l); } + + close(fd); + if (st.st_size != c) { - close(fd); return -1; } - close(fd); - return filehash_sum(fctx); } + static PyObject* method_bmt(PyObject *self, PyObject *args) { bmt_t bctx; const char *input; @@ -60,23 +71,31 @@ static PyObject* method_bmt(PyObject *self, PyObject *args) { if (r != 1) { return NULL; } + bmt_init(&bctx, (char*)input, input_length, data_length); bmt_sum(&bctx); + return Py_BuildValue("y#", &bctx.buf, _SWARM_WORD_SIZE); } + static PyObject* method_filehash_path(PyObject *self, PyObject *args) { filehash_t fctx; - const char *filepath; + const char *inpath; + PyObject *client_callback; int r; - r = PyArg_ParseTuple(args, "s", &filepath); + r = PyArg_ParseTuple(args, "s|O", &inpath, &client_callback); if (r != 1) { return NULL; } - filehash_init(&fctx); - r = filehash_path(&fctx, filepath); + if (client_callback == NULL) { + filehash_init(&fctx); + } else { + filehash_init_callback(&fctx, filehash_callback, client_callback); + } + r = filehash_path(&fctx, inpath); return Py_BuildValue("y#", &fctx.buf, _SWARM_WORD_SIZE); } @@ -84,7 +103,7 @@ static PyObject* method_filehash_path(PyObject *self, PyObject *args) { static PyMethodDef SwarmMethods[] = { {"bmt", method_bmt, METH_VARARGS, "Calculate the BMT hash of the given data"}, - {"filehash_path", method_filehash_path, METH_VARARGS, "Calculate the Swarm file hash of the data from the given file path"}, + {"filehash_path", method_filehash_path, METH_VARARGS, "Calculate the Swarm file hash of the data from the given file path, with optional callback to receive chunks"}, {NULL, NULL, 0, NULL}, };