Utente:Chelin/TesseractOCR.js

'A Wikisource.

Notarella: Aroppo pubbreca putisse necessità 'e pulezzà 'a caché d' 'o navigatóre pe vedé 'e cagnamienti.

  • Firefox / Safari: Sprémme 'o buttóne maiuscole e ffà clic ncopp'a Recarreca, o pure spremme Ctrl-F5 o Ctrl-R (⌘-R ncopp'a Mac)
  • Google Chrome: spremme Ctrl-Shift-R (⌘-Shift-R ncopp'a nu Mac)
  • Internet Explorer/edge: Spremme 'o buttóne Ctrl pe' tramente ca faie click ncopp'a Refresh, o pure spremmere Ctrl-F5
  • Opera: Vaje addò 'o Menu → Mpustaziune (Opera → Mpustaziune ncopp' 'o Mac) e po' ncopp'a Privacy & sicurezza → Pulezza date d' 'o browser → Immaggene e file d' 'a cache.
/*jshint boss:true*/
/*global $, mw*/

/**
 * This script adds a toolbar button that replaces the editbox text with OCR text
 * derived by sending the .prp-page-image image through Tesseract.js.
 *
 * For more information, see https://wikisource.org/wiki/Wikisource:Tesseract_OCR
 * Version modified by Ruthven for nap.source changing default language
 */

( function ( mw, $ ) {

	var i18n = $.extend( {
		'loading tesseract core': 'Carrecanno Tesseract core',
		'initializing tesseract': 'Abbianno Tesseract',
		'loading language traineddata': 'Carrecanno \u02BCe traineddata',
		'initializing api': 'Abbianno API',
		'recognizing text': 'Recanuscinno \u02BCo tiesto',

		'no text': 'Tesseract nun ha dato nu tiesto',
		'image not found': 'Nun aggio truato na fiùra int\u02BC\u2035a paggena',
		'button label': 'Piglia  \u02BCo tiesto \u02BCa Tesseract OCR',
		'loading indicator': 'Ennece \u02BCe carrecamento',
	}, window.tesseractOcrI18n || {} );

	var languages = {
		af: 'afr',
		ar: 'ara',
		az: 'aze',
		be: 'bel',
		bg: 'bul',
		bn: 'ben',
		ca: 'cat',
		chr: 'chr',
		cs: 'ces',
		da: 'dan',
		de: 'deu',
		el: 'ell+grc',
		en: 'enm',
		eo: 'epo',
		es: 'spa+spa_old',
		et: 'est',
		eu: 'eus',
		fa: 'fas',
		fi: 'fin',
		fr: 'fra+frm',
		gl: 'glg',
		he: 'heb',
		hi: 'hin',
		hr: 'hrv',
		hu: 'hun',
		id: 'ind',
		is: 'isl',
		it: 'ita+ita_old',
		nap: 'ita',
		ja: 'jpn',
		kn: 'kan',
		ko: 'kor',
		lt: 'lit',
		lv: 'lav',
		mk: 'mkd',
		ml: 'mal',
		ms: 'msa',
		mt: 'mlt',
		nb: 'nor',
		nl: 'nld',
		nn: 'nor',
		pl: 'pol',
		pt: 'por',
		ro: 'ron',
		ru: 'rus',
		sk: 'slk',
		sl: 'slv',
		sq: 'sqi',
		sr: 'srp',
		sv: 'swe',
		sw: 'swa',
		ta: 'tam',
		te: 'tel',
		th: 'tha',
		tl: 'tgl',
		tr: 'tur',
		uk: 'ukr',
		vi: 'vie',
		zh: 'chi_sim+chi_tra',
	};

	var language = 'ita_old';
	var langCode = mw.config.get( 'wgContentLanguage' );
	if ( languages[ langCode ] !== undefined ) {
		language = languages[ langCode ] + '+' + language;
	}
	var loadingGifUrl = '//upload.wikimedia.org/wikipedia/commons/4/42/Loading.gif';

	/**
	 * The initialisation function, run on every load. Adds the OCR button to the
	 * toolbar if we're currently editing or previewing in the Page namespace.
	 */
	function run() {
		var isPage, useOldToolbar, useBetaToolbar, toolbarLib;
		mw.loader.using( 'user.options', function () {
			isPage = mw.config.get( 'wgCanonicalNamespace' ) === 'Page';
			useOldToolbar = mw.user.options.get( 'showtoolbar' ) === 1;
			useBetaToolbar = mw.user.options.get( 'usebetatoolbar' ) === 1;
			if ( isPage && ( useOldToolbar || useBetaToolbar ) ) {
				toolbarLib = useBetaToolbar ? 'ext.wikiEditor' : 'mediawiki.toolbar';
				mw.loader.using( [ toolbarLib ], function () {
					customizeToolbar( useBetaToolbar );
				} );
			}
		} );
	}

	/**
	 * Add the OCR button to the toolbar. This is called in run, and doesn't
	 * need to check anything about whether we need to add the button.
	 *
	 * @param {boolean} useBeta Whether the WikiEditor toolbar should be used.
	 */
	function customizeToolbar( useBeta ) {

		// Add old-style toolbar button.
		if ( ! useBeta && mw.toolbar ) {
			mw.toolbar.addButton( {
				imageFile: 'https://upload.wikimedia.org/wikipedia/commons/e/e0/Button_ocr.png',
				speedTip: i18n[ 'button label' ],
				imageId: 'TesseractOcrButton'
			} );
			$( 'img#TesseractOcrButton' ).on( 'click', doOcr ).css( 'width', '50px' );
		}

		// Add new-style WikiEditor toolbar button.
		if ( useBeta ) {
			$( document ).ready( function () {
				var ocrButtonDetails = {
					type: 'button',
					icon: 'https://upload.wikimedia.org/wikipedia/commons/thumb/1/11/Toolbaricon_TesseractOCR.png/120px-Toolbaricon_TesseractOCR.png',
					label: i18n[ 'button label' ],
					action: { type: 'callback', execute: doOcr }
				};
				var ocrButton = {
					section: 'main', // 'proofreadpage-tools',
					group: 'insert', // 'other',
					tools: { 'TesseractOcr': ocrButtonDetails }
				};
				$( '#wpTextbox1' ).wikiEditor( 'addToToolbar', ocrButton );
				$( 'a[rel="TesseractOcr"]' ).css( {
					width: '56px',
					backgroundSize: 'contain'
				} );
			} );
		}

		// Pre-load the loading gif.
		$( '<img>' ).attr( 'src', loadingGifUrl ).appendTo( 'body' ).hide();
	}

	/**
	 * This function is run when the OCR button is clicked. It sends the page
	 * image to the API and replace the editbox's text with the restult.
	 */
	function doOcr() {
		if ( $( '.prp-page-image img' ).length === 0 ) {
			mw.notify( i18n[ 'image not found' ] );
		}
		// Send the HTTPS URL because this will be accessed by PHP in the tool.
		var imageUrl = 'https:' + $( '.prp-page-image img' ).attr( 'src' );

		$.getScript( 'https://tools-static.wmflabs.org/cdnjs/ajax/libs/tesseract.js/2.0.0-alpha.2/tesseract.min.js', function() {
			var { TesseractWorker } = Tesseract;
			var worker = new TesseractWorker({
				workerPath: 'https://tools-static.wmflabs.org/cdnjs/ajax/libs/tesseract.js/2.0.0-alpha.2/worker.min.js',
				langPath: 'https://tools.wmflabs.org/tessdata/4.0.0',
				corePath: 'https://tools.wmflabs.org/tessdata/core/tesseract-core.wasm.js',
			});

			worker
			  .recognize( imageUrl, language )
			  .progress( showProgressMsg )
			  .then( processOcrResult );
		} );
	}

	/**
	 * The API result (either the OCR'd text, or an error message) is processed by
	 * this function.
	 *
	 * @param {string} data The response (either text or error) returned from the API.
	 */
	function processOcrResult( result ) {
		if ( result.text === undefined || result.text.length === 0 ) {
			mw.notify( i18n[ 'no text' ] );
			return;
		}
		$( '#wpTextbox1' ).val( result.text );
	}

	/**
	 * Show (or hide) a loading message.
	 *
	 * @param {object} data The data object returned from Tesseract.
	 */
	function showProgressMsg( data ) {
		var loadingBoxId = 'TesseractOcrLoading';			
		var $msgBox = $( '#' + loadingBoxId );
		if ( $msgBox.length === 0 ) {
			var $loadingGif = $( '<img>' )
				.attr( 'src', loadingGifUrl )
				.attr( 'alt', i18n[ 'loading indicator' ] )
				.css( {
					display: 'inline-block',
					margin: '0.3em'
				} );
			$msgBox = $( '<p>' )
				.attr( 'id', loadingBoxId )
				.css( {
					backgroundColor: '#efefef',
					border: '1px solid #ccc',
					display: 'none'
				} )
				.prepend( $( '<span>' ) )
				.prepend( $loadingGif );
			$( '#wpTextbox1' ).before( $msgBox );
		}

		// Add the new message if required.
		if ( data.status.length !== 0 && data.progress < 1) {
			msg = i18n[ data.status ] !== undefined ? i18n[ data.status ] : data.status;
			if ( data.progress > 0 ) {
				msg += ' (' + Math.round( data.progress * 100 ) + '%)';
			}
			$msgBox.find( 'span' ).text( msg );
			$msgBox.show();
		} else {
			$msgBox.hide();
		}
	}

	run();
}( mediaWiki, jQuery ) );