manbytesgnu_site

Source files for manbytesgnu.org
git clone git://holbrook.no/manbytesgnu_site.git
Log | Files | Refs

batch.sh (1445B)


      1 # NOTE! this will only work if your fs supports xattr.
      2 # That's why we cannot use tmpfs (mktemp) here; tmpfs does not support xattr. 
      3 
      4 # directory to copy media files to
      5 outdir=./$(uuidgen)
      6 mkdir -vp $outdir
      7 
      8 # Input dir is the first positional arg.
      9 indir=$1
     10 
     11 IFS=$'\n'
     12 
     13 # Retrieve metadata for each file and import it into the kitab store.
     14 # Also copy the media file to the separate output directory.
     15 for f in $(find $indir -type f); do
     16 	sum=$(md5sum $f | awk '{print $1;}')
     17 	echo "downloading metadata for $indir/$f"
     18 	srct=$(mktemp)
     19 	curl -s -X GET https://libgen.rs/book/bibtex.php?md5=$sum -o $srct
     20 	dstt=$(mktemp)
     21 	xmllint --html --xpath 'string(/html/body/textarea[@id="bibtext"])' $srct  > $dstt
     22 	kitab import --digest md5:$sum $dstt
     23 	cp $f $outdir/
     24 done
     25 
     26 # Apply metadata imported from bibtex as xattr for the media files.
     27 RUST_LOG=info kitab apply --digest md5 $outdir/
     28 
     29 # Rename the files according to the metadata title and media type.
     30 for f in $(ls $outdir); do
     31 	title=$(getfattr --only-values -n user.dcterms:title $outdir/$f)
     32 
     33 	f_typ=$(file -b --mime-type $outdir/$f)
     34 	f_ext=""
     35 	case "$f_typ" in
     36 		"application/pdf")
     37 			f_ext=".pdf"
     38 			;;
     39 		"application/epub+zip")
     40 			f_ext=".epub"
     41 			;;
     42 		"application/x-mobipocket-ebook")
     43 			f_ext=".mobi"
     44 			;;
     45 		"text/plain")
     46 			f_ext=".txt"
     47 			;;
     48 		"text/html")
     49 			f_ext=".html"
     50 			;;
     51 		*)
     52 			>&2 echo unhandled mime type $f_typ
     53 			exit 1
     54 	esac
     55 	mv -v $outdir/$f $outdir/${title}${f_ext}
     56 done