From ee945305828750c4bf6e1ee010444d705eb9334b Mon Sep 17 00:00:00 2001 From: Andy Tai Date: Sat, 20 Feb 2021 22:58:07 -0800 Subject: [PATCH] gnu: Add ocrfeeder * gnu/packages/gnome.scm (ocrfeeder): New variable --- gnu/packages/gnome.scm | 67 ++++++++++++++++++++++++++++++++++++- gnu/packages/ocr.scm | 32 +++++++++--------- gnu/packages/python-xyz.scm | 24 +++++++++++++ 3 files changed, 106 insertions(+), 17 deletions(-) diff --git a/gnu/packages/gnome.scm b/gnu/packages/gnome.scm index 0b46e2719b..b1defb7208 100644 --- a/gnu/packages/gnome.scm +++ b/gnu/packages/gnome.scm @@ -56,7 +56,7 @@ ;;; Copyright © 2020 Arun Isaac ;;; Copyright © 2020 Michael Rohleder ;;; Copyright © 2020 Tim Gesthuizen -;;; Copyright © 2020 Andy Tai +;;; Copyright © 2020, 2021 Andy Tai ;;; ;;; This file is part of GNU Guix. ;;; @@ -149,6 +149,7 @@ #:use-module (gnu packages ninja) #:use-module (gnu packages node) #:use-module (gnu packages nss) + #:use-module (gnu packages ocr) #:use-module (gnu packages openldap) #:use-module (gnu packages package-management) #:use-module (gnu packages password-utils) @@ -12095,3 +12096,67 @@ for the GNOME 3.x platform with many features. It aims to be a very complete editing environment for translation issues within the GNU gettext/GNOME desktop world.") (license license:gpl3+))) + + +(define-public ocrfeeder + (package + (name "ocrfeeder") + (version "0.8.3") + (source (origin + (method url-fetch) + (uri (string-append "mirror://gnome/sources/ocrfeeder/" + (version-major+minor version) "/" + "ocrfeeder-" version ".tar.xz")) + (sha256 + (base32 + "12f5gnq92ffnd5zaj04df7jrnsdz1zn4zcgpbf5p9qnd21i2y529")))) + (build-system gnu-build-system) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after + 'install 'wrap-program + (lambda* (#:key outputs #:allow-other-keys) + (let ((prog (string-append (assoc-ref outputs "out") + "/bin/" "ocrfeeder")) + (pylib (string-append (assoc-ref outputs "out") + "/lib/python" + ,(version-major+minor + (package-version python)) + "/site-packages"))) + (wrap-program prog + `("PYTHONPATH" = (,(getenv "PYTHONPATH") ,pylib)) + `("GI_TYPELIB_PATH" = (,(getenv "GI_TYPELIB_PATH")))) + #t)))))) + (native-inputs + `(("glib:bin" ,glib "bin") ; for glib-compile-resources + ("gobject-introspection" ,gobject-introspection) + ("gtk+:bin" ,gtk+ "bin") ; for gtk-update-icon-cache + ("intltool" ,intltool) + ("itstool" ,itstool) + ("pkg-config" ,pkg-config) + ("xmllint" ,libxml2))) + (inputs + `(("enchant" ,enchant) + ("glib" ,glib) + ("goocanvas" ,goocanvas) + ("gtk" ,gtk+) + ("gtkspell3" ,gtkspell3) + ("libjpeg" ,libjpeg-turbo) + ("libtiff" ,libtiff) + ("libraw" ,libraw) + ("ocrad" ,ocrad) + ("python" ,python-wrapper) + ("python-pygobject" ,python-pygobject) + ("python-odfpy" ,python-odfpy) + ("python-pillow" ,python-pillow) + ("python-pyenchant" ,python-pyenchant) + ("python-reportlab" ,python-reportlab) + ("python-sane" ,python-sane) + ("sane-backends" ,sane-backends) + ("tesseract-ocr" ,tesseract-ocr))) + (home-page "https://wiki.gnome.org/Apps/OCRFeeder") + (synopsis "Complete OCR Suite") + (description "OCRFeeder is a complete Optical Character Recognition and +Document Analysis and Recognition program.") + (license license:gpl3+))) diff --git a/gnu/packages/ocr.scm b/gnu/packages/ocr.scm index c4e965941a..0b46f979ce 100644 --- a/gnu/packages/ocr.scm +++ b/gnu/packages/ocr.scm @@ -67,21 +67,21 @@ it produces text in 8-bit or UTF-8 formats.") ;; There are useful commits beyond the last official stable release. (let ((commit "97079fa353557af6df86fd20b5d2e0dff5d8d5df") (revision "1")) - (package - (name "tesseract-ocr") + (package + (name "tesseract-ocr") (version (git-version "4.1.1" revision commit)) - (source - (origin - (method git-fetch) - (uri (git-reference - (url "https://github.com/tesseract-ocr/tesseract") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/tesseract-ocr/tesseract") (commit commit))) (file-name (git-file-name name version)) - (sha256 + (sha256 (base32 "11137a4aaay7qp64vdjd83hz1l089nzi5a0ql0qgk8gn79pyhi98")))) - (build-system gnu-build-system) - (inputs + (build-system gnu-build-system) + (inputs `(("cairo" ,cairo) ("icu" ,icu4c) ("leptonica" ,leptonica) @@ -98,9 +98,9 @@ it produces text in 8-bit or UTF-8 formats.") ("libtiff" ,libtiff) ("pkg-config" ,pkg-config) ("xsltproc" ,libxslt))) - (arguments + (arguments `(#:configure-flags - (let ((leptonica (assoc-ref %build-inputs "leptonica"))) + (let ((leptonica (assoc-ref %build-inputs "leptonica"))) (list (string-append "LIBLEPT_HEADERSDIR=" leptonica "/include"))) #:tests? #f ; Tests currently result in a segfault #:phases @@ -120,10 +120,10 @@ it produces text in 8-bit or UTF-8 formats.") (add-after 'build-training 'install-training (lambda _ (invoke "make" "training-install")))))) - (home-page "https://github.com/tesseract-ocr/tesseract") - (synopsis "Optical character recognition engine") - (description - "Tesseract is an optical character recognition (OCR) engine with very + (home-page "https://github.com/tesseract-ocr/tesseract") + (synopsis "Optical character recognition engine") + (description + "Tesseract is an optical character recognition (OCR) engine with very high accuracy. It supports many languages, output text formatting, hOCR positional information and page layout analysis. Several image formats are supported through the Leptonica library. It can also detect whether text is diff --git a/gnu/packages/python-xyz.scm b/gnu/packages/python-xyz.scm index b26fce72a9..1fb9ff60b4 100644 --- a/gnu/packages/python-xyz.scm +++ b/gnu/packages/python-xyz.scm @@ -180,6 +180,7 @@ #:use-module (gnu packages readline) #:use-module (gnu packages sdl) #:use-module (gnu packages search) + #:use-module (gnu packages scanner) #:use-module (gnu packages shells) #:use-module (gnu packages sphinx) #:use-module (gnu packages ssh) @@ -23906,3 +23907,26 @@ Application Programming Interface based on the Open Inventor 2.1 API.") Crayons automatically wraps a given string in the foreground color and restores the original state after the string is printed.") (license license:expat))) + +(define-public python-sane + (package + (name "python-sane") + (version "2.9.1") + (source + (origin + (method url-fetch) + (uri (pypi-uri name version)) + (sha256 + (base32 + "1pi597z94n2mkd821ln52fq0g727n2jxfskf280ip3kf7jw8w294")))) + (build-system python-build-system) + (native-inputs + `(("python-pytest" ,python-pytest))) + (inputs + `(("sane-backends" ,sane-backends))) + (home-page "https://github.com/python-pillow/Sane") + (synopsis "Python interface to the SANE scanner") + (description "This package provides Python interface to the SANE scanner and +frame grabber interface for Linux.") + ;; MIT license; FSF directory refers to this as expat licene + (license license:expat))) -- 2.30.0