commit 13b74bee1f22b9501d6ee65b2c0732f0ed876d2f
parent 7877e3831f8dbbe7dd791d400c23eb0afbeb7c4e
Author: nolash <dev@holbrook.no>
Date: Tue, 14 Sep 2021 22:55:56 +0200
Add chunk callback/emit
Diffstat:
2 files changed, 51 insertions(+), 23 deletions(-)
diff --git a/pylibswarm/runnable/file.py b/pylibswarm/runnable/file.py
@@ -11,23 +11,25 @@ argparser = argparse.ArgumentParser()
argparser.add_argument('-n', action='store_true', help='skip newline at end of output')
argparser.add_argument('-b', action='store_true', help='output raw bytes')
argparser.add_argument('-v', action='store_true', help='verbose output')
+argparser.add_argument('-o', type=str, help='chunk output directory')
argparser.add_argument('file', nargs='?', type=str, help='file to hash')
largs = argparser.parse_args(sys.argv[1:])
-#def stdin_arg():
-# """Retreive input arguments from stdin if they exist.
-#
-# Method does not block, and expects arguments to be ready on stdin before being called.
-#
-# :rtype: str
-# :returns: Input arguments string
-# """
-# h = select.select([sys.stdin.buffer], [], [])
-# if len(h[0]) > 0:
-# v = h[0][0].read()
-# return
-# return None
+class Outputter:
+
+ def __init__(self, outdir):
+ self.outdir = outdir
+ logg.info('outputter set to {}'.format(self.outdir))
+
+
+ def dump(self, hsh, data):
+ hsh_hex = hsh.hex()
+ fp = os.path.join(self.outdir, hsh_hex)
+ f = open(fp, 'wb')
+ f.write(data)
+ f.close()
+ logg.debug('wrote {} chunk bytes for hash {}'.format(len(data), hsh_hex))
if largs.v:
@@ -35,11 +37,18 @@ if largs.v:
filepath = os.path.realpath(largs.file)
+outputter = None
+if largs.o:
+ outputter = Outputter(largs.o)
def main():
-
import swarm
- r = swarm.filehash_path(filepath)
+
+ if outputter:
+ r = swarm.filehash_path(filepath, outputter.dump)
+ else:
+ r = swarm.filehash_path(filepath)
+
if largs.b:
sys.stdout.buffer.write(r[:32])
else:
diff --git a/src/python_swarm.c b/src/python_swarm.c
@@ -3,10 +3,19 @@
#include <stdio.h>
#include <fcntl.h>
#include <sys/stat.h>
+#include <unistd.h>
#include "bmt.h"
#include "swarmfile.h"
+
+static void filehash_callback(const char *hash, const char *data, size_t data_length, void *callback_static) {
+ PyObject *callback = (PyObject*)callback_static;
+
+ PyObject_CallFunction(callback, "y#y#", hash, _SWARM_WORD_SIZE, data, data_length);
+}
+
+
static bmt_spansize_t filehash_path(filehash_t *fctx, const char *filepath) {
int fd;
int r;
@@ -19,6 +28,7 @@ static bmt_spansize_t filehash_path(filehash_t *fctx, const char *filepath) {
if (fd == -1) {
return -1;
}
+
r = fstat(fd, &st);
if (r == -1) {
return -1;
@@ -39,16 +49,17 @@ static bmt_spansize_t filehash_path(filehash_t *fctx, const char *filepath) {
c += l;
filehash_write(fctx, buf, l);
}
+
+ close(fd);
+
if (st.st_size != c) {
- close(fd);
return -1;
}
- close(fd);
-
return filehash_sum(fctx);
}
+
static PyObject* method_bmt(PyObject *self, PyObject *args) {
bmt_t bctx;
const char *input;
@@ -60,23 +71,31 @@ static PyObject* method_bmt(PyObject *self, PyObject *args) {
if (r != 1) {
return NULL;
}
+
bmt_init(&bctx, (char*)input, input_length, data_length);
bmt_sum(&bctx);
+
return Py_BuildValue("y#", &bctx.buf, _SWARM_WORD_SIZE);
}
+
static PyObject* method_filehash_path(PyObject *self, PyObject *args) {
filehash_t fctx;
- const char *filepath;
+ const char *inpath;
+ PyObject *client_callback;
int r;
- r = PyArg_ParseTuple(args, "s", &filepath);
+ r = PyArg_ParseTuple(args, "s|O", &inpath, &client_callback);
if (r != 1) {
return NULL;
}
- filehash_init(&fctx);
- r = filehash_path(&fctx, filepath);
+ if (client_callback == NULL) {
+ filehash_init(&fctx);
+ } else {
+ filehash_init_callback(&fctx, filehash_callback, client_callback);
+ }
+ r = filehash_path(&fctx, inpath);
return Py_BuildValue("y#", &fctx.buf, _SWARM_WORD_SIZE);
}
@@ -84,7 +103,7 @@ static PyObject* method_filehash_path(PyObject *self, PyObject *args) {
static PyMethodDef SwarmMethods[] = {
{"bmt", method_bmt, METH_VARARGS, "Calculate the BMT hash of the given data"},
- {"filehash_path", method_filehash_path, METH_VARARGS, "Calculate the Swarm file hash of the data from the given file path"},
+ {"filehash_path", method_filehash_path, METH_VARARGS, "Calculate the Swarm file hash of the data from the given file path, with optional callback to receive chunks"},
{NULL, NULL, 0, NULL},
};