18207 lines
		
	
	
		
			731 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			18207 lines
		
	
	
		
			731 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/bin/env perl
 | 
						|
# cloc -- Count Lines of Code                  {{{1
 | 
						|
# Copyright (C) 2006-2024 Al Danial <al.danial@gmail.com>
 | 
						|
# First release August 2006
 | 
						|
#
 | 
						|
# Includes code from:
 | 
						|
#   - SLOCCount v2.26
 | 
						|
#     http://www.dwheeler.com/sloccount/
 | 
						|
#     by David Wheeler.
 | 
						|
#   - Regexp::Common v2017060201
 | 
						|
#     https://metacpan.org/pod/Regexp::Common
 | 
						|
#     by Damian Conway and Abigail.
 | 
						|
#   - Win32::Autoglob 1.01
 | 
						|
#     https://metacpan.org/pod/Win32::Autoglob
 | 
						|
#     by Sean M. Burke.
 | 
						|
#   - Algorithm::Diff 1.1902
 | 
						|
#     https://metacpan.org/pod/Algorithm::Diff
 | 
						|
#     by Tye McQueen.
 | 
						|
#
 | 
						|
# This program is free software; you can redistribute it and/or modify
 | 
						|
# it under the terms of the GNU General Public License as published by
 | 
						|
# the Free Software Foundation; either version 2 of the License, or
 | 
						|
# (at your option) any later version.
 | 
						|
#
 | 
						|
# This program is distributed in the hope that it will be useful,
 | 
						|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
# GNU General Public License for more details:
 | 
						|
# <http://www.gnu.org/licenses/gpl.txt>.
 | 
						|
#
 | 
						|
# 1}}}
 | 
						|
my $VERSION = "2.01";  # odd number == beta; even number == stable
 | 
						|
my $URL     = "github.com/AlDanial/cloc";  # 'https://' pushes header too wide
 | 
						|
require 5.10.0;
 | 
						|
# use modules                                  {{{1
 | 
						|
use warnings;
 | 
						|
use strict;
 | 
						|
 | 
						|
use Getopt::Long;
 | 
						|
use File::Basename;
 | 
						|
use File::Temp qw { tempfile tempdir };
 | 
						|
use File::Find;
 | 
						|
use File::Path;
 | 
						|
use File::Spec;
 | 
						|
use IO::File;
 | 
						|
use List::Util qw( min max );
 | 
						|
use Cwd;
 | 
						|
use POSIX qw { strftime ceil};
 | 
						|
# Parallel::ForkManager isn't in the standard distribution.
 | 
						|
# Use it only if installed, and only if --processes=N is given.
 | 
						|
# The module load happens in get_max_processes().
 | 
						|
my $HAVE_Parallel_ForkManager = 0;
 | 
						|
 | 
						|
# Digest::MD5 isn't in the standard distribution. Use it only if installed.
 | 
						|
my $HAVE_Digest_MD5 = 0;
 | 
						|
eval "use Digest::MD5;";
 | 
						|
if (defined $Digest::MD5::VERSION) {
 | 
						|
    $HAVE_Digest_MD5 = 1;
 | 
						|
} else {
 | 
						|
    warn "Digest::MD5 not installed; will skip file uniqueness checks.\n";
 | 
						|
}
 | 
						|
 | 
						|
# Time::HiRes became standard with Perl 5.8
 | 
						|
my $HAVE_Time_HiRes = 0;
 | 
						|
eval "use Time::HiRes;";
 | 
						|
$HAVE_Time_HiRes = 1 if defined $Time::HiRes::VERSION;
 | 
						|
 | 
						|
my $HAVE_Rexexp_Common;
 | 
						|
# Regexp::Common isn't in the standard distribution.  It will
 | 
						|
# be installed in a temp directory if necessary.
 | 
						|
eval "use Regexp::Common qw ( comment ) ";
 | 
						|
if (defined $Regexp::Common::VERSION) {
 | 
						|
    $HAVE_Rexexp_Common = 1;
 | 
						|
} else {
 | 
						|
    $HAVE_Rexexp_Common = 0;
 | 
						|
}
 | 
						|
 | 
						|
my $HAVE_Algorithm_Diff = 0;
 | 
						|
# Algorithm::Diff isn't in the standard distribution.  It will
 | 
						|
# be installed in a temp directory if necessary.
 | 
						|
eval "use Algorithm::Diff qw ( sdiff ) ";
 | 
						|
if (defined $Algorithm::Diff::VERSION) {
 | 
						|
    $HAVE_Algorithm_Diff = 1;
 | 
						|
} else {
 | 
						|
    Install_Algorithm_Diff();
 | 
						|
}
 | 
						|
 | 
						|
# print "2 HAVE_Algorithm_Diff = $HAVE_Algorithm_Diff\n";
 | 
						|
# test_alg_diff($ARGV[$#ARGV - 1], $ARGV[$#ARGV]); die;
 | 
						|
# die "Hre=$HAVE_Rexexp_Common  Had=$HAVE_Algorithm_Diff";
 | 
						|
 | 
						|
# Uncomment next two lines when building Windows executable with perl2exe
 | 
						|
# or if running on a system that already has Regexp::Common.
 | 
						|
#use Regexp::Common;
 | 
						|
#$HAVE_Rexexp_Common = 1;
 | 
						|
 | 
						|
#perl2exe_include "Regexp/Common/whitespace.pm"
 | 
						|
#perl2exe_include "Regexp/Common/URI.pm"
 | 
						|
#perl2exe_include "Regexp/Common/URI/fax.pm"
 | 
						|
#perl2exe_include "Regexp/Common/URI/file.pm"
 | 
						|
#perl2exe_include "Regexp/Common/URI/ftp.pm"
 | 
						|
#perl2exe_include "Regexp/Common/URI/gopher.pm"
 | 
						|
#perl2exe_include "Regexp/Common/URI/http.pm"
 | 
						|
#perl2exe_include "Regexp/Common/URI/pop.pm"
 | 
						|
#perl2exe_include "Regexp/Common/URI/prospero.pm"
 | 
						|
#perl2exe_include "Regexp/Common/URI/news.pm"
 | 
						|
#perl2exe_include "Regexp/Common/URI/tel.pm"
 | 
						|
#perl2exe_include "Regexp/Common/URI/telnet.pm"
 | 
						|
#perl2exe_include "Regexp/Common/URI/tv.pm"
 | 
						|
#perl2exe_include "Regexp/Common/URI/wais.pm"
 | 
						|
#perl2exe_include "Regexp/Common/CC.pm"
 | 
						|
#perl2exe_include "Regexp/Common/SEN.pm"
 | 
						|
#perl2exe_include "Regexp/Common/number.pm"
 | 
						|
#perl2exe_include "Regexp/Common/delimited.pm"
 | 
						|
#perl2exe_include "Regexp/Common/profanity.pm"
 | 
						|
#perl2exe_include "Regexp/Common/net.pm"
 | 
						|
#perl2exe_include "Regexp/Common/zip.pm"
 | 
						|
#perl2exe_include "Regexp/Common/comment.pm"
 | 
						|
#perl2exe_include "Regexp/Common/balanced.pm"
 | 
						|
#perl2exe_include "Regexp/Common/lingua.pm"
 | 
						|
#perl2exe_include "Regexp/Common/list.pm"
 | 
						|
#perl2exe_include "File/Glob.pm"
 | 
						|
 | 
						|
use Text::Tabs qw { expand };
 | 
						|
use Cwd qw { cwd };
 | 
						|
use File::Glob;
 | 
						|
# 1}}}
 | 
						|
# Usage information, options processing.       {{{1
 | 
						|
my $ON_WINDOWS = 0;
 | 
						|
   $ON_WINDOWS = 1 if ($^O =~ /^MSWin/) or ($^O eq "Windows_NT");
 | 
						|
if ($ON_WINDOWS and $ENV{'SHELL'}) {
 | 
						|
    if ($ENV{'SHELL'} =~ m{^/}) {
 | 
						|
        $ON_WINDOWS = 0;  # make Cygwin look like Unix
 | 
						|
    } else {
 | 
						|
        $ON_WINDOWS = 1;  # MKS defines $SHELL but still acts like Windows
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
my $HAVE_Win32_Long_Path = 0;
 | 
						|
# Win32::LongPath is an optional dependency that when available on
 | 
						|
# Windows will be used to support reading files past the 255 char
 | 
						|
# path length limit.
 | 
						|
if ($ON_WINDOWS) {
 | 
						|
    eval "use Win32::LongPath;";
 | 
						|
    if (defined $Win32::LongPath::VERSION) {
 | 
						|
        $HAVE_Win32_Long_Path = 1;
 | 
						|
    }
 | 
						|
}
 | 
						|
my $config_file = '';
 | 
						|
if ( $ENV{'HOME'} ) {
 | 
						|
    $config_file = File::Spec->catfile( $ENV{'HOME'}, '.config', 'cloc', 'options.txt');
 | 
						|
} elsif ( $ENV{'APPDATA'} and $ON_WINDOWS ) {
 | 
						|
    $config_file = File::Spec->catfile( $ENV{'APPDATA'}, 'cloc');
 | 
						|
}
 | 
						|
# $config_file may be updated by check_alternate_config_files()
 | 
						|
 | 
						|
my $NN     = chr(27) . "[0m";  # normal
 | 
						|
   $NN     = "" if $ON_WINDOWS or !(-t STDOUT); # -t STDOUT:  is it a terminal?
 | 
						|
my $BB     = chr(27) . "[1m";  # bold
 | 
						|
   $BB     = "" if $ON_WINDOWS or !(-t STDOUT);
 | 
						|
my $script = basename $0;
 | 
						|
 | 
						|
#  Intended for v1.88:
 | 
						|
#  --git-diff-simindex       Git diff strategy #3:  use git's similarity index
 | 
						|
#                            (git diff -M --name-status) to identify file pairs
 | 
						|
#                            to compare.  This is especially useful to compare
 | 
						|
#                            files that were renamed between the commits.
 | 
						|
 | 
						|
my $brief_usage  = "
 | 
						|
                       cloc -- Count Lines of Code
 | 
						|
 | 
						|
Usage:
 | 
						|
    $script [options] <file(s)/dir(s)/git hash(es)>
 | 
						|
        Count physical lines of source code and comments in the given files
 | 
						|
        (may be archives such as compressed tarballs or zip files) and/or
 | 
						|
        recursively below the given directories or git commit hashes.
 | 
						|
        Example:    cloc src/ include/ main.c
 | 
						|
 | 
						|
    $script [options] --diff <set1>  <set2>
 | 
						|
        Compute differences of physical lines of source code and comments
 | 
						|
        between any pairwise combination of directory names, archive
 | 
						|
        files or git commit hashes.
 | 
						|
        Example:    cloc --diff Python-3.5.tar.xz python-3.6/
 | 
						|
 | 
						|
$script --help  shows full documentation on the options.
 | 
						|
https://$URL has numerous examples and more information.
 | 
						|
";
 | 
						|
my $usage  = "
 | 
						|
Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <report files>
 | 
						|
 | 
						|
 Count, or compute differences of, physical lines of source code in the
 | 
						|
 given files (may be archives such as compressed tarballs or zip files,
 | 
						|
 or git commit hashes or branch names) and/or recursively below the
 | 
						|
 given directories.
 | 
						|
 | 
						|
 ${BB}Input Options${NN}
 | 
						|
   --extract-with=<cmd>      This option is only needed if cloc is unable
 | 
						|
                             to figure out how to extract the contents of
 | 
						|
                             the input file(s) by itself.
 | 
						|
                             Use <cmd> to extract binary archive files (e.g.:
 | 
						|
                             .tar.gz, .zip, .Z).  Use the literal '>FILE<' as
 | 
						|
                             a stand-in for the actual file(s) to be
 | 
						|
                             extracted.  For example, to count lines of code
 | 
						|
                             in the input files
 | 
						|
                                gcc-4.2.tar.gz  perl-5.8.8.tar.gz
 | 
						|
                             on Unix use
 | 
						|
                               --extract-with='gzip -dc >FILE< | tar xf -'
 | 
						|
                             or, if you have GNU tar,
 | 
						|
                               --extract-with='tar zxf >FILE<'
 | 
						|
                             and on Windows use, for example:
 | 
						|
                               --extract-with=\"\\\"c:\\Program Files\\WinZip\\WinZip32.exe\\\" -e -o >FILE< .\"
 | 
						|
                             (if WinZip is installed there).
 | 
						|
   --list-file=<file>        Take the list of file and/or directory names to
 | 
						|
                             process from <file>, which has one file/directory
 | 
						|
                             name per line.  Only exact matches are counted;
 | 
						|
                             relative path names will be resolved starting from
 | 
						|
                             the directory where cloc is invoked.  Set <file>
 | 
						|
                             to - to read file names from a STDIN pipe.
 | 
						|
                             See also --exclude-list-file, --config.
 | 
						|
   --diff-list-file=<file>   Take the pairs of file names to be diff'ed from
 | 
						|
                             <file>, whose format matches the output of
 | 
						|
                             --diff-alignment.  (Run with that option to
 | 
						|
                             see a sample.)  The language identifier at the
 | 
						|
                             end of each line is ignored.  This enables --diff
 | 
						|
                             mode and bypasses file pair alignment logic.
 | 
						|
                             Use --diff-list-files to define the file name
 | 
						|
                             pairs in separate files. See also --config.
 | 
						|
   --diff-list-files <file1> <file2>
 | 
						|
                             Compute differences in code and comments between
 | 
						|
                             the files and directories listed in <file1> and
 | 
						|
                             <file2>.  Each input file should use the same
 | 
						|
                             format as --list-file, where there is one file or
 | 
						|
                             directory name per line.  Only exact matches are
 | 
						|
                             counted; relative path names will be resolved
 | 
						|
                             starting from the directory where cloc is invoked.
 | 
						|
                             This enables --diff mode.  See also --list-file,
 | 
						|
                             --diff-list-file, --diff.
 | 
						|
   --vcs=<VCS>               Invoke a system call to <VCS> to obtain a list of
 | 
						|
                             files to work on.  If <VCS> is 'git', then will
 | 
						|
                             invoke 'git ls-files' to get a file list and
 | 
						|
                             'git submodule status' to get a list of submodules
 | 
						|
                             whose contents will be ignored.  See also --git
 | 
						|
                             which accepts git commit hashes and branch names.
 | 
						|
                             If <VCS> is 'svn' then will invoke 'svn list -R'.
 | 
						|
                             The primary benefit is that cloc will then skip
 | 
						|
                             files explicitly excluded by the versioning tool
 | 
						|
                             in question, ie, those in .gitignore or have the
 | 
						|
                             svn:ignore property.
 | 
						|
                             Alternatively <VCS> may be any system command
 | 
						|
                             that generates a list of files.
 | 
						|
                             Note:  cloc must be in a directory which can read
 | 
						|
                             the files as they are returned by <VCS>.  cloc will
 | 
						|
                             not download files from remote repositories.
 | 
						|
                             'svn list -R' may refer to a remote repository
 | 
						|
                             to obtain file names (and therefore may require
 | 
						|
                             authentication to the remote repository), but
 | 
						|
                             the files themselves must be local.
 | 
						|
                             Setting <VCS> to 'auto' selects between 'git'
 | 
						|
                             and 'svn' (or neither) depending on the presence
 | 
						|
                             of a .git or .svn subdirectory below the directory
 | 
						|
                             where cloc is invoked.
 | 
						|
   --unicode                 Check binary files to see if they contain Unicode
 | 
						|
                             expanded ASCII text.  This causes performance to
 | 
						|
                             drop noticeably.
 | 
						|
 | 
						|
 ${BB}Processing Options${NN}
 | 
						|
   --autoconf                Count .in files (as processed by GNU autoconf) of
 | 
						|
                             recognized languages.  See also --no-autogen.
 | 
						|
   --by-file                 Report results for every source file encountered.
 | 
						|
                             See also --fmt under 'Output Options'.
 | 
						|
   --by-file-by-lang         Report results for every source file encountered
 | 
						|
                             in addition to reporting by language.
 | 
						|
   --config <file>           Read command line switches from <file> instead of
 | 
						|
                             the default location of $config_file.
 | 
						|
                             The file should contain one switch, along with
 | 
						|
                             arguments (if any), per line.  Blank lines and lines
 | 
						|
                             beginning with '#' are skipped.  Options given on
 | 
						|
                             the command line take priority over entries read from
 | 
						|
                             the file.
 | 
						|
                             If a directory is also given with any of these
 | 
						|
                             switches: --list-file, --exclude-list-file,
 | 
						|
                             --read-lang-def, --force-lang-def, --diff-list-file
 | 
						|
                             and a config file exists in that directory, it will
 | 
						|
                             take priority over $config_file.
 | 
						|
   --count-and-diff <set1> <set2>
 | 
						|
                             First perform direct code counts of source file(s)
 | 
						|
                             of <set1> and <set2> separately, then perform a diff
 | 
						|
                             of these.  Inputs may be pairs of files, directories,
 | 
						|
                             or archives.  If --out or --report-file is given,
 | 
						|
                             three output files will be created, one for each
 | 
						|
                             of the two counts and one for the diff.  See also
 | 
						|
                             --diff, --diff-alignment, --diff-timeout,
 | 
						|
                             --ignore-case, --ignore-whitespace.
 | 
						|
   --diff <set1> <set2>      Compute differences in code and comments between
 | 
						|
                             source file(s) of <set1> and <set2>.  The inputs
 | 
						|
                             may be any mix of files, directories, archives,
 | 
						|
                             or git commit hashes.  Use --diff-alignment to
 | 
						|
                             generate a list showing which file pairs where
 | 
						|
                             compared.  When comparing git branches, only files
 | 
						|
                             which have changed in either commit are compared.
 | 
						|
                             See also --git, --count-and-diff, --diff-alignment,
 | 
						|
                             --diff-list-file, --diff-timeout, --ignore-case,
 | 
						|
                             --ignore-whitespace.
 | 
						|
   --diff-timeout <N>        Ignore files which take more than <N> seconds
 | 
						|
                             to process.  Default is 10 seconds.  Setting <N>
 | 
						|
                             to 0 allows unlimited time.  (Large files with many
 | 
						|
                             repeated lines can cause Algorithm::Diff::sdiff()
 | 
						|
                             to take hours.) See also --timeout.
 | 
						|
   --docstring-as-code       cloc considers docstrings to be comments, but this is
 | 
						|
                             not always correct as docstrings represent regular
 | 
						|
                             strings when they appear on the right hand side of an
 | 
						|
                             assignment or as function arguments.  This switch
 | 
						|
                             forces docstrings to be counted as code.
 | 
						|
   --follow-links            [Unix only] Follow symbolic links to directories
 | 
						|
                             (sym links to files are always followed).
 | 
						|
                             See also --stat.
 | 
						|
   --force-lang=<lang>[,<ext>]
 | 
						|
                             Process all files that have a <ext> extension
 | 
						|
                             with the counter for language <lang>.  For
 | 
						|
                             example, to count all .f files with the
 | 
						|
                             Fortran 90 counter (which expects files to
 | 
						|
                             end with .f90) instead of the default Fortran 77
 | 
						|
                             counter, use
 | 
						|
                               --force-lang=\"Fortran 90,f\"
 | 
						|
                             If <ext> is omitted, every file will be counted
 | 
						|
                             with the <lang> counter.  This option can be
 | 
						|
                             specified multiple times (but that is only
 | 
						|
                             useful when <ext> is given each time).
 | 
						|
                             See also --script-lang, --lang-no-ext.
 | 
						|
   --force-lang-def=<file>   Load language processing filters from <file>,
 | 
						|
                             then use these filters instead of the built-in
 | 
						|
                             filters.  Note:  languages which map to the same
 | 
						|
                             file extension (for example:
 | 
						|
                             MATLAB/Mathematica/Objective-C/MUMPS/Mercury;
 | 
						|
                             Pascal/PHP; Lisp/OpenCL; Lisp/Julia; Perl/Prolog)
 | 
						|
                             will be ignored as these require additional
 | 
						|
                             processing that is not expressed in language
 | 
						|
                             definition files.  Use --read-lang-def to define
 | 
						|
                             new language filters without replacing built-in
 | 
						|
                             filters (see also --write-lang-def,
 | 
						|
                             --write-lang-def-incl-dup, --config).
 | 
						|
   --git                     Forces the inputs to be interpreted as git targets
 | 
						|
                             (commit hashes, branch names, et cetera) if these
 | 
						|
                             are not first identified as file or directory
 | 
						|
                             names.  This option overrides the --vcs=git logic
 | 
						|
                             if this is given; in other words, --git gets its
 | 
						|
                             list of files to work on directly from git using
 | 
						|
                             the hash or branch name rather than from
 | 
						|
                             'git ls-files'.  This option can be used with
 | 
						|
                             --diff to perform line count diffs between git
 | 
						|
                             commits, or between a git commit and a file,
 | 
						|
                             directory, or archive.  Use -v/--verbose to see
 | 
						|
                             the git system commands cloc issues.
 | 
						|
   --git-diff-rel            Same as --git --diff, or just --diff if the inputs
 | 
						|
                             are recognized as git targets.  Only files which
 | 
						|
                             have changed in either commit are compared.
 | 
						|
   --git-diff-all            Git diff strategy #2:  compare all files in the
 | 
						|
                             repository between the two commits.
 | 
						|
   --ignore-whitespace       Ignore horizontal white space when comparing files
 | 
						|
                             with --diff.  See also --ignore-case.
 | 
						|
   --ignore-case             Ignore changes in case within file contents;
 | 
						|
                             consider upper- and lowercase letters equivalent
 | 
						|
                             when comparing files with --diff.  See also
 | 
						|
                             --ignore-whitespace.
 | 
						|
   --ignore-case-ext         Ignore case of file name extensions.  This will
 | 
						|
                             cause problems counting some languages
 | 
						|
                             (specifically, .c and .C are associated with C and
 | 
						|
                             C++; this switch would count .C files as C rather
 | 
						|
                             than C++ on *nix operating systems).  File name
 | 
						|
                             case insensitivity is always true on Windows.
 | 
						|
   --lang-no-ext=<lang>      Count files without extensions using the <lang>
 | 
						|
                             counter.  This option overrides internal logic
 | 
						|
                             for files without extensions (where such files
 | 
						|
                             are checked against known scripting languages
 | 
						|
                             by examining the first line for #!).  See also
 | 
						|
                             --force-lang, --script-lang.
 | 
						|
   --max-file-size=<MB>      Skip files larger than <MB> megabytes when
 | 
						|
                             traversing directories.  By default, <MB>=100.
 | 
						|
                             cloc's memory requirement is roughly twenty times
 | 
						|
                             larger than the largest file so running with
 | 
						|
                             files larger than 100 MB on a computer with less
 | 
						|
                             than 2 GB of memory will cause problems.
 | 
						|
                             Note:  this check does not apply to files
 | 
						|
                             explicitly passed as command line arguments.
 | 
						|
   --no-autogen[=list]       Ignore files generated by code-production systems
 | 
						|
                             such as GNU autoconf.  To see a list of these files
 | 
						|
                             (then exit), run with --no-autogen list
 | 
						|
                             See also --autoconf.
 | 
						|
   --no-recurse              Count files in the given directories without
 | 
						|
                             recursively descending below them.
 | 
						|
   --original-dir            [Only effective in combination with
 | 
						|
                             --strip-comments or --strip-code]  Write the stripped
 | 
						|
                             files to the same directory as the original files.
 | 
						|
   --only-count-files        Only count files by language.  Blank, comment, and
 | 
						|
                             code counts will be zero.
 | 
						|
   --read-binary-files       Process binary files in addition to text files.
 | 
						|
                             This is usually a bad idea and should only be
 | 
						|
                             attempted with text files that have embedded
 | 
						|
                             binary data.
 | 
						|
   --read-lang-def=<file>    Load new language processing filters from <file>
 | 
						|
                             and merge them with those already known to cloc.
 | 
						|
                             If <file> defines a language cloc already knows
 | 
						|
                             about, cloc's definition will take precedence.
 | 
						|
                             Use --force-lang-def to over-ride cloc's
 | 
						|
                             definitions (see also --write-lang-def,
 | 
						|
                             --write-lang-def-incl-dup, --config).
 | 
						|
   --script-lang=<lang>,<s>  Process all files that invoke <s> as a #!
 | 
						|
                             scripting language with the counter for language
 | 
						|
                             <lang>.  For example, files that begin with
 | 
						|
                                #!/usr/local/bin/perl5.8.8
 | 
						|
                             will be counted with the Perl counter by using
 | 
						|
                                --script-lang=Perl,perl5.8.8
 | 
						|
                             The language name is case insensitive but the
 | 
						|
                             name of the script language executable, <s>,
 | 
						|
                             must have the right case.  This option can be
 | 
						|
                             specified multiple times.  See also --force-lang,
 | 
						|
                             --lang-no-ext.
 | 
						|
   --sdir=<dir>              Use <dir> as the scratch directory instead of
 | 
						|
                             letting File::Temp chose the location.  Files
 | 
						|
                             written to this location are not removed at
 | 
						|
                             the end of the run (as they are with File::Temp).
 | 
						|
   --skip-leading=<N[,ext]>  Skip the first <N> lines of each file.  If a
 | 
						|
                             comma separated list of extensions is also given,
 | 
						|
                             only skip lines from those file types.  Example:
 | 
						|
                               --skip-leading=10,cpp,h
 | 
						|
                             will skip the first ten lines of *.cpp and *.h
 | 
						|
                             files.  This is useful for ignoring boilerplate
 | 
						|
                             text.
 | 
						|
   --skip-uniqueness         Skip the file uniqueness check.  This will give
 | 
						|
                             a performance boost at the expense of counting
 | 
						|
                             files with identical contents multiple times
 | 
						|
                             (if such duplicates exist).
 | 
						|
   --stat                    Some file systems (AFS, CD-ROM, FAT, HPFS, SMB)
 | 
						|
                             do not have directory 'nlink' counts that match
 | 
						|
                             the number of its subdirectories.  Consequently
 | 
						|
                             cloc may undercount or completely skip the
 | 
						|
                             contents of such file systems.  This switch forces
 | 
						|
                             File::Find to stat directories to obtain the
 | 
						|
                             correct count.  File search speed will decrease.
 | 
						|
                             See also --follow-links.
 | 
						|
   --stdin-name=<file>       Give a file name to use to determine the language
 | 
						|
                             for standard input.  (Use - as the input name to
 | 
						|
                             receive source code via STDIN.)
 | 
						|
   --strip-code=<ext>        For each file processed, write to the current
 | 
						|
                             directory a version of the file which has blank
 | 
						|
                             and code lines, including code with (in-line
 | 
						|
                             comments) removed. The name of each stripped file
 | 
						|
                             is the original file name with .<ext> appended to
 | 
						|
                             it. It is written to the current directory unless
 | 
						|
                             --original-dir is on.
 | 
						|
   --strip-comments=<ext>    For each file processed, write to the current
 | 
						|
                             directory a version of the file which has blank
 | 
						|
                             and commented lines removed (in-line comments
 | 
						|
                             persist).  The name of each stripped file is the
 | 
						|
                             original file name with .<ext> appended to it.
 | 
						|
                             It is written to the current directory unless
 | 
						|
                             --original-dir is on.
 | 
						|
   --strip-str-comments      Replace comment markers embedded in strings with
 | 
						|
                             'xx'.  This attempts to work around a limitation
 | 
						|
                             in Regexp::Common::Comment where comment markers
 | 
						|
                             embedded in strings are seen as actual comment
 | 
						|
                             markers and not strings, often resulting in a
 | 
						|
                             'Complex regular subexpression recursion limit'
 | 
						|
                             warning and incorrect counts.  There are two
 | 
						|
                             disadvantages to using this switch:  1/code count
 | 
						|
                             performance drops, and 2/code generated with
 | 
						|
                             --strip-comments will contain different strings
 | 
						|
                             where ever embedded comments are found.
 | 
						|
   --sum-reports             Input arguments are report files previously
 | 
						|
                             created with the --report-file option in plain
 | 
						|
                             format (eg. not JSON, YAML, XML, or SQL).
 | 
						|
                             Makes a cumulative set of results containing the
 | 
						|
                             sum of data from the individual report files.
 | 
						|
   --timeout <N>             Ignore files which take more than <N> seconds
 | 
						|
                             to process at any of the language's filter stages.
 | 
						|
                             The default maximum number of seconds spent on a
 | 
						|
                             filter stage is the number of lines in the file
 | 
						|
                             divided by one thousand.  Setting <N> to 0 allows
 | 
						|
                             unlimited time.  See also --diff-timeout.
 | 
						|
   --processes=NUM           [Available only on systems with a recent version
 | 
						|
                             of the Parallel::ForkManager module.  Not
 | 
						|
                             available on Windows.] Sets the maximum number of
 | 
						|
                             cores that cloc uses.  The default value of 0
 | 
						|
                             disables multiprocessing.
 | 
						|
   --unix                    Override the operating system autodetection
 | 
						|
                             logic and run in UNIX mode.  See also
 | 
						|
                             --windows, --show-os.
 | 
						|
   --use-sloccount           If SLOCCount is installed, use its compiled
 | 
						|
                             executables c_count, java_count, pascal_count,
 | 
						|
                             php_count, and xml_count instead of cloc's
 | 
						|
                             counters.  SLOCCount's compiled counters are
 | 
						|
                             substantially faster than cloc's and may give
 | 
						|
                             a performance improvement when counting projects
 | 
						|
                             with large files.  However, these cloc-specific
 | 
						|
                             features will not be available: --diff,
 | 
						|
                             --count-and-diff, --strip-code, --strip-comments,
 | 
						|
                             --unicode.
 | 
						|
   --windows                 Override the operating system autodetection
 | 
						|
                             logic and run in Microsoft Windows mode.
 | 
						|
                             See also --unix, --show-os.
 | 
						|
 | 
						|
 ${BB}Filter Options${NN}
 | 
						|
   --include-content=<regex> Only count files containing text that matches the
 | 
						|
                             given regular expression.
 | 
						|
   --exclude-content=<regex> Exclude files containing text that matches the given
 | 
						|
                             regular expression.
 | 
						|
   --exclude-dir=<D1>[,D2,]  Exclude the given comma separated directories
 | 
						|
                             D1, D2, D3, et cetera, from being scanned.  For
 | 
						|
                             example  --exclude-dir=.cache,test  will skip
 | 
						|
                             all files and subdirectories that have /.cache/
 | 
						|
                             or /test/ as their parent directory.
 | 
						|
                             Directories named .bzr, .cvs, .hg, .git, .svn,
 | 
						|
                             and .snapshot are always excluded.
 | 
						|
                             This option only works with individual directory
 | 
						|
                             names so including file path separators is not
 | 
						|
                             allowed.  Use --fullpath and --not-match-d=<regex>
 | 
						|
                             to supply a regex matching multiple subdirectories.
 | 
						|
   --exclude-ext=<ext1>[,<ext2>[...]]
 | 
						|
                             Do not count files having the given file name
 | 
						|
                             extensions.
 | 
						|
   --exclude-lang=<L1>[,L2[...]]
 | 
						|
                             Exclude the given comma separated languages
 | 
						|
                             L1, L2, L3, et cetera, from being counted.
 | 
						|
   --exclude-list-file=<file>  Ignore files and/or directories whose names
 | 
						|
                             appear in <file>.  <file> should have one file
 | 
						|
                             name per line.  Only exact matches are ignored;
 | 
						|
                             relative path names will be resolved starting from
 | 
						|
                             the directory where cloc is invoked.
 | 
						|
                             See also --list-file, --config.
 | 
						|
   --fullpath                Modifies the behavior of --match-f, --not-match-f,
 | 
						|
                             and --not-match-d to include the file's path--
 | 
						|
                             relative to the directory from which cloc is
 | 
						|
                             invoked--in the regex, not just the file's basename.
 | 
						|
                             (This does not expand each filename to include its
 | 
						|
                             fully qualified absolute path; instead, it uses as
 | 
						|
                             much of the path as is passed in to cloc.)
 | 
						|
   --include-ext=<ext1>[,ext2[...]]
 | 
						|
                             Count only languages having the given comma
 | 
						|
                             separated file extensions.  Use --show-ext to
 | 
						|
                             see the recognized extensions.
 | 
						|
   --include-lang=<L1>[,L2[...]]
 | 
						|
                             Count only the given comma separated, case-
 | 
						|
                             insensitive languages L1, L2, L3, et cetera.  Use
 | 
						|
                             --show-lang to see the list of recognized languages.
 | 
						|
   --match-d=<regex>         Only count files in directories matching the Perl
 | 
						|
                             regex.  For example
 | 
						|
                               --match-d='/(src|include)/'
 | 
						|
                             only counts files in directories containing
 | 
						|
                             /src/ or /include/.  Unlike --not-match-d,
 | 
						|
                             --match-f, and --not-match-f, --match-d always
 | 
						|
                             anchors the regex to the directory from which
 | 
						|
                             cloc is invoked.
 | 
						|
   --not-match-d=<regex>     Count all files except those in directories
 | 
						|
                             matching the Perl regex.  Only the trailing
 | 
						|
                             directory name is compared, for example, when
 | 
						|
                             counting in /usr/local/lib, only 'lib' is
 | 
						|
                             compared to the regex.
 | 
						|
                             Add --fullpath to compare parent directories, beginning
 | 
						|
                             from the directory where cloc is invoked, to the regex.
 | 
						|
                             Do not include file path separators at the beginning
 | 
						|
                             or end of the regex. This option may be repeated.
 | 
						|
   --match-f=<regex>         Only count files whose basenames match the Perl
 | 
						|
                             regex.  For example
 | 
						|
                               --match-f='^[Ww]idget'
 | 
						|
                             only counts files that start with Widget or widget.
 | 
						|
                             Add --fullpath to include parent directories
 | 
						|
                             in the regex instead of just the basename.
 | 
						|
   --not-match-f=<regex>     Count all files except those whose basenames
 | 
						|
                             match the Perl regex.  Add --fullpath to include
 | 
						|
                             parent directories in the regex instead of just
 | 
						|
                             the basename. This option may be repeated.
 | 
						|
   --skip-archive=<regex>    Ignore files that end with the given Perl regular
 | 
						|
                             expression.  For example, if given
 | 
						|
                               --skip-archive='(zip|tar(\.(gz|Z|bz2|xz|7z))?)'
 | 
						|
                             the code will skip files that end with .zip,
 | 
						|
                             .tar, .tar.gz, .tar.Z, .tar.bz2, .tar.xz, and
 | 
						|
                             .tar.7z.
 | 
						|
   --skip-win-hidden         On Windows, ignore hidden files.
 | 
						|
 | 
						|
 ${BB}Debug Options${NN}
 | 
						|
   --categorized=<file>      Save file sizes in bytes, identified languages
 | 
						|
                             and names of categorized files to <file>.
 | 
						|
   --counted=<file>          Save names of processed source files to <file>.
 | 
						|
   --diff-alignment=<file>   Write to <file> a list of files and file pairs
 | 
						|
                             showing which files were added, removed, and/or
 | 
						|
                             compared during a run with --diff.  This switch
 | 
						|
                             forces the --diff mode on.
 | 
						|
   --explain=<lang>          Print the filters used to remove comments for
 | 
						|
                             language <lang> and exit.  In some cases the
 | 
						|
                             filters refer to Perl subroutines rather than
 | 
						|
                             regular expressions.  An examination of the
 | 
						|
                             source code may be needed for further explanation.
 | 
						|
   --help                    Print this usage information and exit.
 | 
						|
   --found=<file>            Save names of every file found to <file>.
 | 
						|
   --ignored=<file>          Save names of ignored files and the reason they
 | 
						|
                             were ignored to <file>.
 | 
						|
   --print-filter-stages     Print processed source code before and after
 | 
						|
                             each filter is applied.
 | 
						|
   --show-ext[=<ext>]        Print information about all known (or just the
 | 
						|
                             given) file extensions and exit.
 | 
						|
   --show-lang[=<lang>]      Print information about all known (or just the
 | 
						|
                             given) languages and exit.
 | 
						|
   --show-os                 Print the value of the operating system mode
 | 
						|
                             and exit.  See also --unix, --windows.
 | 
						|
   -v[=<n>]                  Verbose switch (optional numeric value).
 | 
						|
   -verbose[=<n>]            Long form of -v.
 | 
						|
   --version                 Print the version of this program and exit.
 | 
						|
   --write-lang-def=<file>   Writes to <file> the language processing filters
 | 
						|
                             then exits.  Useful as a first step to creating
 | 
						|
                             custom language definitions. Note: languages which
 | 
						|
                             map to the same file extension will be excluded.
 | 
						|
                             (See also --force-lang-def, --read-lang-def).
 | 
						|
   --write-lang-def-incl-dup=<file>
 | 
						|
                             Same as --write-lang-def, but includes duplicated
 | 
						|
                             extensions.  This generates a problematic language
 | 
						|
                             definition file because cloc will refuse to use
 | 
						|
                             it until duplicates are removed.
 | 
						|
 | 
						|
 ${BB}Output Options${NN}
 | 
						|
   --3                       Print third-generation language output.
 | 
						|
                             (This option can cause report summation to fail
 | 
						|
                             if some reports were produced with this option
 | 
						|
                             while others were produced without it.)
 | 
						|
   --by-percent  X           Instead of comment and blank line counts, show
 | 
						|
                             these values as percentages based on the value
 | 
						|
                             of X in the denominator, where X is
 | 
						|
                                 c    meaning lines of code
 | 
						|
                                 cm   meaning lines of code + comments
 | 
						|
                                 cb   meaning lines of code + blanks
 | 
						|
                                 cmb  meaning lines of code + comments + blanks
 | 
						|
                             For example, if using method 'c' and your code
 | 
						|
                             has twice as many lines of comments as lines
 | 
						|
                             of code, the value in the comment column will
 | 
						|
                             be 200%.  The code column remains a line count.
 | 
						|
   --csv                     Write the results as comma separated values.
 | 
						|
   --csv-delimiter=<C>       Use the character <C> as the delimiter for comma
 | 
						|
                             separated files instead of ,.  This switch forces
 | 
						|
   --file-encoding=<E>       Write output files using the <E> encoding instead of
 | 
						|
                             the default ASCII (<E> = 'UTF-7').  Examples: 'UTF-16',
 | 
						|
                             'euc-kr', 'iso-8859-16'.  Known encodings can be
 | 
						|
                             printed with
 | 
						|
                               perl -MEncode -e 'print join(\"\\n\", Encode->encodings(\":all\")), \"\\n\"'
 | 
						|
   --fmt=<N>                 Alternate text output format where <N> is a number
 | 
						|
                             from 1 to 5, or -1 to -5. 'total lines' means the
 | 
						|
                             sum of code, comment, and blank lines.  Negative
 | 
						|
                             values are the same as the positive values but retain,
 | 
						|
                             instead of deleting, the intermediate JSON file that
 | 
						|
                             is written.  The JSON file name is randomly generated
 | 
						|
                             unless --out/--report-file is given.  The formats are:
 | 
						|
                               1:  by language (same as cloc default output)
 | 
						|
                               2:  by language with an extra column for total lines
 | 
						|
                               3:  by file with language
 | 
						|
                               4:  by file with a total lines column
 | 
						|
                               5:  by file with language and a total lines column
 | 
						|
   --hide-rate               Do not show elapsed time, line processing rate, or
 | 
						|
                             file processing rates in the output header. This
 | 
						|
                             makes output deterministic.
 | 
						|
   --json                    Write the results as JavaScript Object Notation
 | 
						|
                             (JSON) formatted output.
 | 
						|
   --md                      Write the results as Markdown-formatted text.
 | 
						|
   --out=<file>              Synonym for --report-file=<file>.
 | 
						|
   --progress-rate=<n>       Show progress update after every <n> files are
 | 
						|
                             processed (default <n>=100).  Set <n> to 0 to
 | 
						|
                             suppress progress output (useful when redirecting
 | 
						|
                             output to STDOUT).
 | 
						|
   --quiet                   Suppress all information messages except for
 | 
						|
                             the final report.
 | 
						|
   --report-file=<file>      Write the results to <file> instead of STDOUT.
 | 
						|
   --summary-cutoff=X:N      Aggregate to 'Other' results having X lines
 | 
						|
                             below N where X is one of
 | 
						|
                                c   meaning lines of code
 | 
						|
                                f   meaning files
 | 
						|
                                m   meaning lines of comments
 | 
						|
                                cm  meaning lines of code + comments
 | 
						|
                             Appending a percent sign to N changes
 | 
						|
                             the calculation from straight count to
 | 
						|
                             percentage.
 | 
						|
                             Ignored with --diff or --by-file.
 | 
						|
   --sql=<file>              Write results as SQL create and insert statements
 | 
						|
                             which can be read by a database program such as
 | 
						|
                             SQLite.  If <file> is -, output is sent to STDOUT.
 | 
						|
   --sql-append              Append SQL insert statements to the file specified
 | 
						|
                             by --sql and do not generate table creation
 | 
						|
                             statements.  Only valid with the --sql option.
 | 
						|
   --sql-project=<name>      Use <name> as the project identifier for the
 | 
						|
                             current run.  Only valid with the --sql option.
 | 
						|
   --sql-style=<style>       Write SQL statements in the given style instead
 | 
						|
                             of the default SQLite format.  Styles include
 | 
						|
                             'Oracle' and 'Named_Columns'.
 | 
						|
   --sum-one                 For plain text reports, show the SUM: output line
 | 
						|
                             even if only one input file is processed.
 | 
						|
   --xml                     Write the results in XML.
 | 
						|
   --xsl=<file>              Reference <file> as an XSL stylesheet within
 | 
						|
                             the XML output.  If <file> is 1 (numeric one),
 | 
						|
                             writes a default stylesheet, cloc.xsl (or
 | 
						|
                             cloc-diff.xsl if --diff is also given).
 | 
						|
                             This switch forces --xml on.
 | 
						|
   --yaml                    Write the results in YAML.
 | 
						|
 | 
						|
";
 | 
						|
#  Help information for options not yet implemented:
 | 
						|
#  --inline                  Process comments that appear at the end
 | 
						|
#                            of lines containing code.
 | 
						|
#  --html                    Create HTML files of each input file showing
 | 
						|
#                            comment and code lines in different colors.
 | 
						|
 | 
						|
$| = 1;  # flush STDOUT
 | 
						|
my $start_time = get_time();
 | 
						|
my (
 | 
						|
    $opt_categorized          ,
 | 
						|
    $opt_found                ,
 | 
						|
    @opt_force_lang           ,
 | 
						|
    $opt_lang_no_ext          ,
 | 
						|
    @opt_script_lang          ,
 | 
						|
    $opt_count_diff           ,
 | 
						|
    $opt_diff                 ,
 | 
						|
    $opt_diff_alignment       ,
 | 
						|
    $opt_diff_list_file       ,
 | 
						|
    $opt_diff_list_files      ,
 | 
						|
    $opt_diff_timeout         ,
 | 
						|
    $opt_timeout              ,
 | 
						|
    $opt_html                 ,
 | 
						|
    $opt_ignored              ,
 | 
						|
    $opt_counted              ,
 | 
						|
    $opt_show_ext             ,
 | 
						|
    $opt_show_lang            ,
 | 
						|
    $opt_progress_rate        ,
 | 
						|
    $opt_print_filter_stages  ,
 | 
						|
    $opt_v                    ,
 | 
						|
    $opt_vcs                  ,
 | 
						|
    $opt_version              ,
 | 
						|
    $opt_include_content      ,
 | 
						|
    $opt_exclude_content      ,
 | 
						|
    $opt_exclude_lang         ,
 | 
						|
    $opt_exclude_list_file    ,
 | 
						|
    $opt_exclude_dir          ,
 | 
						|
    $opt_explain              ,
 | 
						|
    $opt_include_ext          ,
 | 
						|
    $opt_include_lang         ,
 | 
						|
    $opt_force_lang_def       ,
 | 
						|
    $opt_read_lang_def        ,
 | 
						|
    $opt_write_lang_def       ,
 | 
						|
    $opt_write_lang_def_incl_dup,
 | 
						|
    $opt_strip_code           ,
 | 
						|
    $opt_strip_comments       ,
 | 
						|
    $opt_original_dir         ,
 | 
						|
    $opt_quiet                ,
 | 
						|
    $opt_report_file          ,
 | 
						|
    $opt_sdir                 ,
 | 
						|
    $opt_sum_reports          ,
 | 
						|
    $opt_hide_rate            ,
 | 
						|
    $opt_processes            ,
 | 
						|
    $opt_unicode              ,
 | 
						|
    $opt_no3                  ,   # accept it but don't use it
 | 
						|
    $opt_3                    ,
 | 
						|
    $opt_extract_with         ,
 | 
						|
    $opt_by_file              ,
 | 
						|
    $opt_by_file_by_lang      ,
 | 
						|
    $opt_by_percent           ,
 | 
						|
    $opt_xml                  ,
 | 
						|
    $opt_xsl                  ,
 | 
						|
    $opt_yaml                 ,
 | 
						|
    $opt_csv                  ,
 | 
						|
    $opt_csv_delimiter        ,
 | 
						|
    $opt_fullpath             ,
 | 
						|
    $opt_json                 ,
 | 
						|
    $opt_md                   ,
 | 
						|
    $opt_match_f              ,
 | 
						|
    @opt_not_match_f          ,
 | 
						|
    $opt_match_d              ,
 | 
						|
    @opt_not_match_d          ,
 | 
						|
    $opt_skip_uniqueness      ,
 | 
						|
    $opt_list_file            ,
 | 
						|
    $opt_help                 ,
 | 
						|
    $opt_skip_win_hidden      ,
 | 
						|
    $opt_read_binary_files    ,
 | 
						|
    $opt_sql                  ,
 | 
						|
    $opt_sql_append           ,
 | 
						|
    $opt_sql_project          ,
 | 
						|
    $opt_sql_style            ,
 | 
						|
    $opt_inline               ,
 | 
						|
    $opt_exclude_ext          ,
 | 
						|
    $opt_ignore_whitespace    ,
 | 
						|
    $opt_ignore_case          ,
 | 
						|
    $opt_ignore_case_ext      ,
 | 
						|
    $opt_follow_links         ,
 | 
						|
    $opt_autoconf             ,
 | 
						|
    $opt_sum_one              ,
 | 
						|
    $opt_stdin_name           ,
 | 
						|
    $opt_force_on_windows     ,
 | 
						|
    $opt_force_on_unix        ,   # actually forces !$ON_WINDOWS
 | 
						|
    $opt_show_os              ,
 | 
						|
    $opt_skip_archive         ,
 | 
						|
    $opt_max_file_size        ,   # in MB
 | 
						|
    $opt_use_sloccount        ,
 | 
						|
    $opt_no_autogen           ,
 | 
						|
    $opt_force_git            ,
 | 
						|
    $opt_git_diff_rel         ,
 | 
						|
    $opt_git_diff_all         ,
 | 
						|
    $opt_git_diff_simindex    ,
 | 
						|
    $opt_config_file          ,
 | 
						|
    $opt_strip_str_comments   ,
 | 
						|
    $opt_file_encoding        ,
 | 
						|
    $opt_docstring_as_code    ,
 | 
						|
    $opt_stat                 ,
 | 
						|
    $opt_summary_cutoff       ,
 | 
						|
    $opt_skip_leading         ,
 | 
						|
    $opt_no_recurse           ,
 | 
						|
    $opt_only_count_files     ,
 | 
						|
    $opt_fmt                  ,
 | 
						|
   );
 | 
						|
 | 
						|
my $getopt_success = GetOptions(             # {{{1
 | 
						|
   "by_file|by-file"                         => \$opt_by_file             ,
 | 
						|
   "by_file_by_lang|by-file-by-lang"         => \$opt_by_file_by_lang     ,
 | 
						|
   "categorized=s"                           => \$opt_categorized         ,
 | 
						|
   "counted=s"                               => \$opt_counted             ,
 | 
						|
   "include_ext|include-ext=s"               => \$opt_include_ext         ,
 | 
						|
   "include_lang|include-lang=s"             => \$opt_include_lang        ,
 | 
						|
   "include_content|include-content=s"       => \$opt_include_content     ,
 | 
						|
   "exclude_content|exclude-content=s"       => \$opt_exclude_content     ,
 | 
						|
   "exclude_lang|exclude-lang=s"             => \$opt_exclude_lang        ,
 | 
						|
   "exclude_dir|exclude-dir=s"               => \$opt_exclude_dir         ,
 | 
						|
   "exclude_list_file|exclude-list-file=s"   => \$opt_exclude_list_file   ,
 | 
						|
   "explain=s"                               => \$opt_explain             ,
 | 
						|
   "extract_with|extract-with=s"             => \$opt_extract_with        ,
 | 
						|
   "found=s"                                 => \$opt_found               ,
 | 
						|
   "count_and_diff|count-and-diff"           => \$opt_count_diff          ,
 | 
						|
   "diff"                                    => \$opt_diff                ,
 | 
						|
   "diff-alignment|diff_alignment=s"         => \$opt_diff_alignment      ,
 | 
						|
   "diff-timeout|diff_timeout=i"             => \$opt_diff_timeout        ,
 | 
						|
   "diff-list-file|diff_list_file=s"         => \$opt_diff_list_file      ,
 | 
						|
   "diff-list-files|diff_list_files"         => \$opt_diff_list_files     ,
 | 
						|
   "timeout=i"                               => \$opt_timeout             ,
 | 
						|
   "html"                                    => \$opt_html                ,
 | 
						|
   "ignored=s"                               => \$opt_ignored             ,
 | 
						|
   "quiet"                                   => \$opt_quiet               ,
 | 
						|
   "force_lang_def|force-lang-def=s"         => \$opt_force_lang_def      ,
 | 
						|
   "read_lang_def|read-lang-def=s"           => \$opt_read_lang_def       ,
 | 
						|
   "show_ext|show-ext:s"                     => \$opt_show_ext            ,
 | 
						|
   "show_lang|show-lang:s"                   => \$opt_show_lang           ,
 | 
						|
   "progress_rate|progress-rate=i"           => \$opt_progress_rate       ,
 | 
						|
   "print_filter_stages|print-filter-stages" => \$opt_print_filter_stages ,
 | 
						|
   "report_file|report-file=s"               => \$opt_report_file         ,
 | 
						|
   "out=s"                                   => \$opt_report_file         ,
 | 
						|
   "script_lang|script-lang=s"               => \@opt_script_lang         ,
 | 
						|
   "sdir=s"                                  => \$opt_sdir                ,
 | 
						|
   "skip_uniqueness|skip-uniqueness"         => \$opt_skip_uniqueness     ,
 | 
						|
   "strip_code|strip-code=s"                 => \$opt_strip_code          ,
 | 
						|
   "strip_comments|strip-comments=s"         => \$opt_strip_comments      ,
 | 
						|
   "original_dir|original-dir"               => \$opt_original_dir        ,
 | 
						|
   "sum_reports|sum-reports"                 => \$opt_sum_reports         ,
 | 
						|
   "hide_rate|hide-rate"                     => \$opt_hide_rate           ,
 | 
						|
   "processes=n"                             => \$opt_processes           ,
 | 
						|
   "unicode"                                 => \$opt_unicode             ,
 | 
						|
   "no3"                                     => \$opt_no3                 ,  # ignored
 | 
						|
   "3"                                       => \$opt_3                   ,
 | 
						|
   "v|verbose:i"                             => \$opt_v                   ,
 | 
						|
   "vcs=s"                                   => \$opt_vcs                 ,
 | 
						|
   "version"                                 => \$opt_version             ,
 | 
						|
   "write_lang_def|write-lang-def=s"         => \$opt_write_lang_def      ,
 | 
						|
   "write_lang_def_incl_dup|write-lang-def-incl-dup=s" => \$opt_write_lang_def_incl_dup,
 | 
						|
   "xml"                                     => \$opt_xml                 ,
 | 
						|
   "xsl=s"                                   => \$opt_xsl                 ,
 | 
						|
   "force_lang|force-lang=s"                 => \@opt_force_lang          ,
 | 
						|
   "lang_no_ext|lang-no-ext=s"               => \$opt_lang_no_ext         ,
 | 
						|
   "yaml"                                    => \$opt_yaml                ,
 | 
						|
   "csv"                                     => \$opt_csv                 ,
 | 
						|
   "csv_delimiter|csv-delimiter=s"           => \$opt_csv_delimiter       ,
 | 
						|
   "json"                                    => \$opt_json                ,
 | 
						|
   "md"                                      => \$opt_md                  ,
 | 
						|
   "fullpath"                                => \$opt_fullpath            ,
 | 
						|
   "match_f|match-f=s"                       => \$opt_match_f             ,
 | 
						|
   "not_match_f|not-match-f=s"               => \@opt_not_match_f         ,
 | 
						|
   "match_d|match-d=s"                       => \$opt_match_d             ,
 | 
						|
   "not_match_d|not-match-d=s"               => \@opt_not_match_d         ,
 | 
						|
   "list_file|list-file=s"                   => \$opt_list_file           ,
 | 
						|
   "help"                                    => \$opt_help                ,
 | 
						|
   "skip_win_hidden|skip-win-hidden"         => \$opt_skip_win_hidden     ,
 | 
						|
   "read_binary_files|read-binary-files"     => \$opt_read_binary_files   ,
 | 
						|
   "sql=s"                                   => \$opt_sql                 ,
 | 
						|
   "sql_project|sql-project=s"               => \$opt_sql_project         ,
 | 
						|
   "sql_append|sql-append"                   => \$opt_sql_append          ,
 | 
						|
   "sql_style|sql-style=s"                   => \$opt_sql_style           ,
 | 
						|
   "inline"                                  => \$opt_inline              ,
 | 
						|
   "exclude_ext|exclude-ext=s"               => \$opt_exclude_ext         ,
 | 
						|
   "ignore_whitespace|ignore-whitespace"     => \$opt_ignore_whitespace   ,
 | 
						|
   "ignore_case|ignore-case"                 => \$opt_ignore_case         ,
 | 
						|
   "ignore_case_ext|ignore-case-ext"         => \$opt_ignore_case_ext     ,
 | 
						|
   "follow_links|follow-links"               => \$opt_follow_links        ,
 | 
						|
   "autoconf"                                => \$opt_autoconf            ,
 | 
						|
   "sum_one|sum-one"                         => \$opt_sum_one             ,
 | 
						|
   "by_percent|by-percent=s"                 => \$opt_by_percent          ,
 | 
						|
   "stdin_name|stdin-name=s"                 => \$opt_stdin_name          ,
 | 
						|
   "windows"                                 => \$opt_force_on_windows    ,
 | 
						|
   "unix"                                    => \$opt_force_on_unix       ,
 | 
						|
   "show_os|show-os"                         => \$opt_show_os             ,
 | 
						|
   "skip_archive|skip-archive=s"             => \$opt_skip_archive        ,
 | 
						|
   "max_file_size|max-file-size=f"           => \$opt_max_file_size       ,
 | 
						|
   "use_sloccount|use-sloccount"             => \$opt_use_sloccount       ,
 | 
						|
   "no_autogen|no-autogen"                   => \$opt_no_autogen          ,
 | 
						|
   "git"                                     => \$opt_force_git           ,
 | 
						|
   "git_diff_rel|git-diff-rel"               => \$opt_git_diff_rel        ,
 | 
						|
   "git_diff_all|git-diff-all"               => \$opt_git_diff_all        ,
 | 
						|
#  "git_diff_simindex|git-diff-simindex"     => \$opt_git_diff_simindex   ,
 | 
						|
   "config=s"                                => \$opt_config_file         ,
 | 
						|
   "strip_str_comments|strip-str-comments"   => \$opt_strip_str_comments  ,
 | 
						|
   "file_encoding|file-encoding=s"           => \$opt_file_encoding       ,
 | 
						|
   "docstring_as_code|docstring-as-code"     => \$opt_docstring_as_code   ,
 | 
						|
   "stat"                                    => \$opt_stat                ,
 | 
						|
   "summary_cutoff|summary-cutoff=s"         => \$opt_summary_cutoff      ,
 | 
						|
   "skip_leading|skip-leading:s"             => \$opt_skip_leading        ,
 | 
						|
   "no_recurse|no-recurse"                   => \$opt_no_recurse          ,
 | 
						|
   "only_count_files|only-count-files"       => \$opt_only_count_files    ,
 | 
						|
   "fmt=i"                                   => \$opt_fmt                 ,
 | 
						|
  );
 | 
						|
# 1}}}
 | 
						|
$config_file = $opt_config_file if defined $opt_config_file;
 | 
						|
load_from_config_file($config_file,          # {{{2
 | 
						|
                                                \$opt_by_file             ,
 | 
						|
                                                \$opt_by_file_by_lang     ,
 | 
						|
                                                \$opt_categorized         ,
 | 
						|
                                                \$opt_counted             ,
 | 
						|
                                                \$opt_include_ext         ,
 | 
						|
                                                \$opt_include_lang        ,
 | 
						|
                                                \$opt_include_content     ,
 | 
						|
                                                \$opt_exclude_content     ,
 | 
						|
                                                \$opt_exclude_lang        ,
 | 
						|
                                                \$opt_exclude_dir         ,
 | 
						|
                                                \$opt_exclude_list_file   ,
 | 
						|
                                                \$opt_explain             ,
 | 
						|
                                                \$opt_extract_with        ,
 | 
						|
                                                \$opt_found               ,
 | 
						|
                                                \$opt_count_diff          ,
 | 
						|
                                                \$opt_diff_list_files     ,
 | 
						|
                                                \$opt_diff                ,
 | 
						|
                                                \$opt_diff_alignment      ,
 | 
						|
                                                \$opt_diff_timeout        ,
 | 
						|
                                                \$opt_timeout             ,
 | 
						|
                                                \$opt_html                ,
 | 
						|
                                                \$opt_ignored             ,
 | 
						|
                                                \$opt_quiet               ,
 | 
						|
                                                \$opt_force_lang_def      ,
 | 
						|
                                                \$opt_read_lang_def       ,
 | 
						|
                                                \$opt_show_ext            ,
 | 
						|
                                                \$opt_show_lang           ,
 | 
						|
                                                \$opt_progress_rate       ,
 | 
						|
                                                \$opt_print_filter_stages ,
 | 
						|
                                                \$opt_report_file         ,
 | 
						|
                                                \@opt_script_lang         ,
 | 
						|
                                                \$opt_sdir                ,
 | 
						|
                                                \$opt_skip_uniqueness     ,
 | 
						|
                                                \$opt_strip_code          ,
 | 
						|
                                                \$opt_strip_comments      ,
 | 
						|
                                                \$opt_original_dir        ,
 | 
						|
                                                \$opt_sum_reports         ,
 | 
						|
                                                \$opt_hide_rate           ,
 | 
						|
                                                \$opt_processes           ,
 | 
						|
                                                \$opt_unicode             ,
 | 
						|
                                                \$opt_3                   ,
 | 
						|
                                                \$opt_v                   ,
 | 
						|
                                                \$opt_vcs                 ,
 | 
						|
                                                \$opt_version             ,
 | 
						|
                                                \$opt_write_lang_def      ,
 | 
						|
                                                \$opt_write_lang_def_incl_dup,
 | 
						|
                                                \$opt_xml                 ,
 | 
						|
                                                \$opt_xsl                 ,
 | 
						|
                                                \@opt_force_lang          ,
 | 
						|
                                                \$opt_lang_no_ext         ,
 | 
						|
                                                \$opt_yaml                ,
 | 
						|
                                                \$opt_csv                 ,
 | 
						|
                                                \$opt_csv_delimiter       ,
 | 
						|
                                                \$opt_json                ,
 | 
						|
                                                \$opt_md                  ,
 | 
						|
                                                \$opt_fullpath            ,
 | 
						|
                                                \$opt_match_f             ,
 | 
						|
                                                \@opt_not_match_f         ,
 | 
						|
                                                \$opt_match_d             ,
 | 
						|
                                                \@opt_not_match_d         ,
 | 
						|
                                                \$opt_list_file           ,
 | 
						|
                                                \$opt_help                ,
 | 
						|
                                                \$opt_skip_win_hidden     ,
 | 
						|
                                                \$opt_read_binary_files   ,
 | 
						|
                                                \$opt_sql                 ,
 | 
						|
                                                \$opt_sql_project         ,
 | 
						|
                                                \$opt_sql_append          ,
 | 
						|
                                                \$opt_sql_style           ,
 | 
						|
                                                \$opt_inline              ,
 | 
						|
                                                \$opt_exclude_ext         ,
 | 
						|
                                                \$opt_ignore_whitespace   ,
 | 
						|
                                                \$opt_ignore_case         ,
 | 
						|
                                                \$opt_ignore_case_ext     ,
 | 
						|
                                                \$opt_follow_links        ,
 | 
						|
                                                \$opt_autoconf            ,
 | 
						|
                                                \$opt_sum_one             ,
 | 
						|
                                                \$opt_by_percent          ,
 | 
						|
                                                \$opt_stdin_name          ,
 | 
						|
                                                \$opt_force_on_windows    ,
 | 
						|
                                                \$opt_force_on_unix       ,
 | 
						|
                                                \$opt_show_os             ,
 | 
						|
                                                \$opt_skip_archive        ,
 | 
						|
                                                \$opt_max_file_size       ,
 | 
						|
                                                \$opt_use_sloccount       ,
 | 
						|
                                                \$opt_no_autogen          ,
 | 
						|
                                                \$opt_force_git           ,
 | 
						|
                                                \$opt_strip_str_comments  ,
 | 
						|
                                                \$opt_file_encoding       ,
 | 
						|
                                                \$opt_docstring_as_code   ,
 | 
						|
                                                \$opt_stat                ,
 | 
						|
);  # 2}}} Not pretty.  Not at all.
 | 
						|
if ($opt_version) {
 | 
						|
    printf "$VERSION\n";
 | 
						|
    exit;
 | 
						|
}
 | 
						|
my $opt_git = 0;
 | 
						|
$opt_git = 1 if defined($opt_git_diff_all) or
 | 
						|
                defined($opt_git_diff_rel) or
 | 
						|
                (defined($opt_vcs) and ($opt_vcs eq "git"));
 | 
						|
$opt_by_file  = 1 if defined  $opt_by_file_by_lang;
 | 
						|
$opt_fmt = 0 unless defined $opt_fmt;
 | 
						|
if ($opt_fmt) {
 | 
						|
    $opt_by_file = 1;
 | 
						|
    $opt_json = 1;
 | 
						|
}
 | 
						|
my $CLOC_XSL = "cloc.xsl"; # created with --xsl
 | 
						|
   $CLOC_XSL = "cloc-diff.xsl" if $opt_diff;
 | 
						|
die "\n" unless $getopt_success;
 | 
						|
print $usage and exit if $opt_help;
 | 
						|
my %Exclude_Language = ();
 | 
						|
   %Exclude_Language = map { $_ => 1 } split(/,/, $opt_exclude_lang)
 | 
						|
        if $opt_exclude_lang;
 | 
						|
my %Exclude_Dir      = ();
 | 
						|
   %Exclude_Dir      = map { $_ => 1 } split(/,/, $opt_exclude_dir )
 | 
						|
        if $opt_exclude_dir ;
 | 
						|
die unless exclude_dir_validates(\%Exclude_Dir);
 | 
						|
my %Include_Ext = ();
 | 
						|
   %Include_Ext = map { $_ => 1 } split(/,/, $opt_include_ext)
 | 
						|
        if $opt_include_ext;
 | 
						|
my %Include_Language = (); # keys are lower case language names
 | 
						|
   %Include_Language = map { lc($_) => 1 } split(/,/, $opt_include_lang)
 | 
						|
        if $opt_include_lang;
 | 
						|
# Forcibly exclude .svn, .cvs, .hg, .git, .bzr directories.  The contents of these
 | 
						|
# directories often conflict with files of interest.
 | 
						|
$opt_exclude_dir       = 1;
 | 
						|
$Exclude_Dir{".svn"}   = 1;
 | 
						|
$Exclude_Dir{".cvs"}   = 1;
 | 
						|
$Exclude_Dir{".hg"}    = 1;
 | 
						|
$Exclude_Dir{".git"}   = 1;
 | 
						|
$Exclude_Dir{".bzr"}   = 1;
 | 
						|
$Exclude_Dir{".snapshot"} = 1;  # NetApp backups
 | 
						|
$Exclude_Dir{".config"} = 1;
 | 
						|
$opt_count_diff        = defined $opt_count_diff ? 1 : 0;
 | 
						|
$opt_diff              = 1  if $opt_diff_alignment    or
 | 
						|
                               $opt_diff_list_file    or
 | 
						|
                               $opt_diff_list_files   or
 | 
						|
                               $opt_git_diff_rel      or
 | 
						|
                               $opt_git_diff_all      or
 | 
						|
                               $opt_git_diff_simindex;
 | 
						|
$opt_force_git         = 1  if $opt_git_diff_rel      or
 | 
						|
                               $opt_git_diff_all      or
 | 
						|
                               $opt_git_diff_simindex;
 | 
						|
$opt_diff_alignment    = 0  if $opt_diff_list_file;
 | 
						|
$opt_exclude_ext       = "" unless $opt_exclude_ext;
 | 
						|
$opt_ignore_whitespace = 0  unless $opt_ignore_whitespace;
 | 
						|
$opt_ignore_case       = 0  unless $opt_ignore_case;
 | 
						|
$opt_ignore_case_ext   = 0  unless $opt_ignore_case_ext;
 | 
						|
$opt_lang_no_ext       = 0  unless $opt_lang_no_ext;
 | 
						|
$opt_follow_links      = 0  unless $opt_follow_links;
 | 
						|
if (defined $opt_diff_timeout) {
 | 
						|
    # if defined but with a value of <= 0, set to 2^31-1 seconds = 68 years
 | 
						|
    $opt_diff_timeout = 2**31-1 unless $opt_diff_timeout > 0;
 | 
						|
} else {
 | 
						|
    $opt_diff_timeout  =10; # seconds
 | 
						|
}
 | 
						|
if (defined $opt_timeout) {
 | 
						|
    # if defined but with a value of <= 0, set to 2^31-1 seconds = 68 years
 | 
						|
    $opt_timeout = 2**31-1 unless $opt_timeout > 0;
 | 
						|
    # else is computed dynamically, ref $max_duration_sec
 | 
						|
}
 | 
						|
$opt_csv               = 0  unless defined $opt_csv;
 | 
						|
$opt_csv               = 1  if $opt_csv_delimiter;
 | 
						|
$ON_WINDOWS            = 1  if $opt_force_on_windows;
 | 
						|
$ON_WINDOWS            = 0  if $opt_force_on_unix;
 | 
						|
$opt_max_file_size     = 100 unless $opt_max_file_size;
 | 
						|
my $HAVE_SLOCCOUNT_c_count = 0;
 | 
						|
if (!$ON_WINDOWS and $opt_use_sloccount) {
 | 
						|
    # Only bother doing this kludgey test is user explicitly wants
 | 
						|
    # to use SLOCCount.  Debian based systems will hang if just doing
 | 
						|
    #  external_utility_exists("c_count")
 | 
						|
    # if c_count is in $PATH; c_count expects to have input.
 | 
						|
    $HAVE_SLOCCOUNT_c_count = external_utility_exists("c_count /bin/sh");
 | 
						|
}
 | 
						|
if ($opt_use_sloccount) {
 | 
						|
    if (!$HAVE_SLOCCOUNT_c_count) {
 | 
						|
        warn "c_count could not be found; ignoring --use-sloccount\n";
 | 
						|
        $opt_use_sloccount = 0;
 | 
						|
    } else {
 | 
						|
        warn "Using c_count, php_count, xml_count, pascal_count from SLOCCount\n";
 | 
						|
        warn "--diff is disabled with --use-sloccount\n" if $opt_diff;
 | 
						|
        warn "--count-and-diff is disabled with --use-sloccount\n" if $opt_count_diff;
 | 
						|
        warn "--unicode is disabled with --use-sloccount\n" if $opt_unicode;
 | 
						|
        warn "--strip-comments is disabled with --use-sloccount\n" if $opt_strip_comments;
 | 
						|
        warn "--strip-code is disabled with --use-sloccount\n" if $opt_strip_code;
 | 
						|
        $opt_diff           = 0;
 | 
						|
        $opt_count_diff     = undef;
 | 
						|
        $opt_unicode        = 0;
 | 
						|
        $opt_strip_comments = 0;
 | 
						|
        $opt_strip_code     = 0;
 | 
						|
    }
 | 
						|
}
 | 
						|
die "--strip-comments and --strip-code are mutually exclusive\n" if
 | 
						|
    $opt_strip_comments and $opt_strip_code;
 | 
						|
$opt_vcs = 0 if $opt_force_git;
 | 
						|
 | 
						|
# replace Windows path separators with /
 | 
						|
if ($ON_WINDOWS) {
 | 
						|
    map { s{\\}{/}g } @ARGV;
 | 
						|
    if ($opt_git) {
 | 
						|
        # PowerShell tab expansion automatically prefixes local directories
 | 
						|
        # with ".\" (now mapped to "./").   git ls-files output does not
 | 
						|
        # include this.  Strip this prefix to permit clean matches.
 | 
						|
        map { s{^\./}{} } @ARGV;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
my @COUNT_DIFF_ARGV        = undef;
 | 
						|
my $COUNT_DIFF_report_file = undef;
 | 
						|
if ($opt_count_diff and !$opt_diff_list_file) {
 | 
						|
    die "--count-and-diff requires two arguments; got ", scalar @ARGV, "\n"
 | 
						|
        if scalar @ARGV != 2;
 | 
						|
    # prefix with a dummy term so that $opt_count_diff is the
 | 
						|
    # index into @COUNT_DIFF_ARGV to work on at each pass
 | 
						|
    @COUNT_DIFF_ARGV = (undef, $ARGV[0],
 | 
						|
                               $ARGV[1],
 | 
						|
                              [$ARGV[0], $ARGV[1]]);  # 3rd pass: diff them
 | 
						|
    $COUNT_DIFF_report_file = $opt_report_file if $opt_report_file;
 | 
						|
}
 | 
						|
 | 
						|
# Options defaults:
 | 
						|
$opt_quiet         =   1 if ($opt_md or $opt_json or !(-t STDOUT))
 | 
						|
                            and !defined $opt_report_file;
 | 
						|
$opt_progress_rate = 100 unless defined $opt_progress_rate;
 | 
						|
$opt_progress_rate =   0 if     defined $opt_quiet;
 | 
						|
if (!defined $opt_v) {
 | 
						|
    $opt_v  = 0;
 | 
						|
} elsif (!$opt_v) {
 | 
						|
    $opt_v  = 1;
 | 
						|
}
 | 
						|
if (defined $opt_xsl) {
 | 
						|
    $opt_xsl = $CLOC_XSL if $opt_xsl eq "1";
 | 
						|
    $opt_xml = 1;
 | 
						|
}
 | 
						|
my $skip_generate_report = 0;
 | 
						|
$opt_sql_style = 0 unless defined $opt_sql_style;
 | 
						|
$opt_sql = 0 unless $opt_sql_style or defined $opt_sql;
 | 
						|
if ($opt_sql eq "-" || $opt_sql eq "1") { # stream SQL output to STDOUT
 | 
						|
    $opt_quiet            = 1;
 | 
						|
    $skip_generate_report = 1;
 | 
						|
    $opt_by_file          = 1;
 | 
						|
    $opt_sum_reports      = 0;
 | 
						|
    $opt_progress_rate    = 0;
 | 
						|
} elsif ($opt_sql)  { # write SQL output to a file
 | 
						|
    $opt_by_file          = 1;
 | 
						|
    $skip_generate_report = 1;
 | 
						|
    $opt_sum_reports      = 0;
 | 
						|
}
 | 
						|
if ($opt_sql_style) {
 | 
						|
    $opt_sql_style = lc $opt_sql_style;
 | 
						|
    if (!grep { lc $_ eq $opt_sql_style } qw ( Oracle Named_Columns )) {
 | 
						|
        die "'$opt_sql_style' is not a recognized SQL style.\n";
 | 
						|
    }
 | 
						|
}
 | 
						|
$opt_by_percent = '' unless defined $opt_by_percent;
 | 
						|
if ($opt_by_percent and $opt_by_percent !~ m/^(c|cm|cb|cmb)$/i) {
 | 
						|
    die "--by-percent must be either 'c', 'cm', 'cb', or 'cmb'\n";
 | 
						|
}
 | 
						|
$opt_by_percent = lc $opt_by_percent;
 | 
						|
 | 
						|
if (defined $opt_vcs) {
 | 
						|
    if ($opt_vcs eq "auto") {
 | 
						|
        if      (is_dir(".git")) {
 | 
						|
            $opt_vcs = "git";
 | 
						|
        } elsif (is_dir(".svn")) {
 | 
						|
            $opt_vcs = "svn";
 | 
						|
        } else {
 | 
						|
            warn "--vcs auto:  unable to determine versioning system\n";
 | 
						|
        }
 | 
						|
    }
 | 
						|
    if      ($opt_vcs eq "git") {
 | 
						|
        $opt_vcs = "git ls-files";
 | 
						|
        my @submodules = invoke_generator('git submodule status', \@ARGV);
 | 
						|
        foreach my $SM (@submodules) {
 | 
						|
            $SM =~ s/^\s+//;        # may have leading space
 | 
						|
            $SM =~ s/\(\S+\)\s*$//; # may end with something like (heads/master)
 | 
						|
            my ($checksum, $dir) = split(' ', $SM, 2);
 | 
						|
            $dir =~ s/\s+$//;
 | 
						|
            $Exclude_Dir{$dir} = 1;
 | 
						|
        }
 | 
						|
    } elsif ($opt_vcs eq "svn") {
 | 
						|
        $opt_vcs = "svn list -R";
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
my $list_no_autogen = 0;
 | 
						|
if (defined $opt_no_autogen and scalar @ARGV == 1 and $ARGV[0] eq "list") {
 | 
						|
    $list_no_autogen = 1;
 | 
						|
}
 | 
						|
if ($opt_summary_cutoff) {
 | 
						|
    my $error = summary_cutoff_error($opt_summary_cutoff);
 | 
						|
    die "$error\n" if $error;
 | 
						|
}
 | 
						|
 | 
						|
if (!$opt_config_file) {
 | 
						|
    # if not explicitly given, look for a config file in other
 | 
						|
    # possible locations
 | 
						|
    my $other_loc = check_alternate_config_files($opt_list_file,
 | 
						|
        $opt_exclude_list_file, $opt_read_lang_def, $opt_force_lang_def,
 | 
						|
        $opt_diff_list_file);
 | 
						|
    $opt_config_file = $other_loc if $other_loc;
 | 
						|
}
 | 
						|
 | 
						|
# --match-d and --not-match-d: if end with a trailing slash, update the
 | 
						|
# regex to be either slash or end of line since File::Find::find() will
 | 
						|
# not see the trailing slash in leaf directories (#732, #833).
 | 
						|
if ($opt_match_d and $opt_match_d =~ m{/$}) {
 | 
						|
    $opt_match_d =~ s{/$}{(/|\$)};
 | 
						|
}
 | 
						|
foreach my $nmd (@opt_not_match_d) {
 | 
						|
    if ($nmd =~ m{/$}) {
 | 
						|
        $nmd =~ s{/$}{(/|\$)};
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
die $brief_usage unless defined $opt_version         or
 | 
						|
                        defined $opt_show_lang       or
 | 
						|
                        defined $opt_show_ext        or
 | 
						|
                        defined $opt_show_os         or
 | 
						|
                        defined $opt_write_lang_def  or
 | 
						|
                        defined $opt_write_lang_def_incl_dup  or
 | 
						|
                        defined $opt_list_file       or
 | 
						|
                        defined $opt_diff_list_file  or
 | 
						|
                        defined $opt_vcs             or
 | 
						|
                        defined $opt_xsl             or
 | 
						|
                        defined $opt_explain         or
 | 
						|
                        $list_no_autogen             or
 | 
						|
                        scalar @ARGV >= 1;
 | 
						|
if (!$opt_diff_list_file) {
 | 
						|
    die "--diff requires two arguments; got ", scalar @ARGV, "\n"
 | 
						|
        if $opt_diff and !$opt_sum_reports and scalar @ARGV != 2;
 | 
						|
    die "--diff arguments are identical; nothing done", "\n"
 | 
						|
        if $opt_diff and !$opt_sum_reports and scalar @ARGV == 2
 | 
						|
                                           and $ARGV[0] eq $ARGV[1];
 | 
						|
}
 | 
						|
trick_pp_packer_encode() if $ON_WINDOWS and $opt_file_encoding;
 | 
						|
$File::Find::dont_use_nlink = 1 if $opt_stat or top_level_SMB_dir(\@ARGV);
 | 
						|
my @git_similarity = (); # only populated with --git-diff-simindex
 | 
						|
my %git_metadata   = ();
 | 
						|
get_git_metadata(\@ARGV, \%git_metadata) if $opt_force_git;
 | 
						|
#use Data::Dumper;
 | 
						|
#print Dumper(\%git_metadata);
 | 
						|
replace_git_hash_with_tarfile(\@ARGV, \@git_similarity);
 | 
						|
# 1}}}
 | 
						|
# Step 1:  Initialize global constants.        {{{1
 | 
						|
#
 | 
						|
my $nFiles_Found = 0;  # updated in make_file_list
 | 
						|
my (%Language_by_Extension, %Language_by_Script,
 | 
						|
    %Filters_by_Language, %Not_Code_Extension, %Not_Code_Filename,
 | 
						|
    %Language_by_File, %Scale_Factor, %Known_Binary_Archives,
 | 
						|
    %Language_by_Prefix, %EOL_Continuation_re,
 | 
						|
   );
 | 
						|
my $ALREADY_SHOWED_HEADER = 0;
 | 
						|
my $ALREADY_SHOWED_XML_SECTION = 0;
 | 
						|
my %Error_Codes = ( 'Unable to read'                => -1,
 | 
						|
                    'Neither file nor directory'    => -2,
 | 
						|
                    'Diff error (quoted comments?)' => -3,
 | 
						|
                    'Diff error, exceeded timeout'  => -4,
 | 
						|
                    'Line count, exceeded timeout'  => -5,
 | 
						|
                  );
 | 
						|
my %Extension_Collision = (
 | 
						|
    'ADSO/IDSM'                                     => [ 'adso' ] ,
 | 
						|
    'C#/Smalltalk'                                  => [ 'cs'   ] ,
 | 
						|
    'D/dtrace'                                      => [ 'd'    ] ,
 | 
						|
    'F#/Forth'                                      => [ 'fs'   ] ,
 | 
						|
    'Fortran 77/Forth'                              => [ 'f', 'for' ] ,
 | 
						|
    'IDL/Qt Project/Prolog/ProGuard'                => [ 'pro'  ] ,
 | 
						|
    'Lisp/Julia'                                    => [ 'jl'   ] ,
 | 
						|
    'Lisp/OpenCL'                                   => [ 'cl'   ] ,
 | 
						|
    'MATLAB/Mathematica/Objective-C/MUMPS/Mercury'  => [ 'm'    ] ,
 | 
						|
    'Pascal/Puppet'                                 => [ 'pp'   ] ,
 | 
						|
    'Perl/Prolog'                                   => [ 'pl', 'PL'  ] ,
 | 
						|
    'PHP/Pascal/Fortran'                            => [ 'inc'  ] ,
 | 
						|
    'Raku/Prolog'                                   => [ 'p6', 'P6'  ] ,
 | 
						|
    'Qt/Glade'                                      => [ 'ui'   ] ,
 | 
						|
    'TypeScript/Qt Linguist'                        => [ 'ts'   ] ,
 | 
						|
    'Verilog-SystemVerilog/Coq'                     => [ 'v'    ] ,
 | 
						|
    'Visual Basic/TeX/Apex Class'                   => [ 'cls'  ] ,
 | 
						|
    'Scheme/SaltStack'                              => [ 'sls'  ] ,
 | 
						|
);
 | 
						|
my @Autogen_to_ignore = no_autogen_files($list_no_autogen);
 | 
						|
if ($opt_force_lang_def) {
 | 
						|
    # replace cloc's definitions
 | 
						|
    read_lang_def(
 | 
						|
        $opt_force_lang_def    , #        Sample values:
 | 
						|
        \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77'
 | 
						|
        \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
 | 
						|
        \%Language_by_File     , # Language_by_File{makefile}  = 'make'
 | 
						|
        \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] =
 | 
						|
                                 #      [ 'remove_matches' , '^\s*#'  ]
 | 
						|
        \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
 | 
						|
        \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
 | 
						|
        \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
 | 
						|
        \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
 | 
						|
        );
 | 
						|
} else {
 | 
						|
    set_constants(               #
 | 
						|
        \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77'
 | 
						|
        \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
 | 
						|
        \%Language_by_File     , # Language_by_File{makefile}  = 'make'
 | 
						|
        \%Language_by_Prefix   , # Language_by_Prefix{Dockerfile}  = 'Dockerfile'
 | 
						|
        \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] =
 | 
						|
                                 #      [ 'remove_matches' , '^\s*#'  ]
 | 
						|
        \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
 | 
						|
        \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
 | 
						|
        \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
 | 
						|
        \%Known_Binary_Archives, # Known_Binary_Archives{.tar} = 1
 | 
						|
        \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
 | 
						|
        );
 | 
						|
        if ($opt_no_autogen) {
 | 
						|
            foreach my $F (@Autogen_to_ignore) { $Not_Code_Filename{ $F } = 1; }
 | 
						|
        }
 | 
						|
}
 | 
						|
if ($opt_read_lang_def) {
 | 
						|
    # augment cloc's definitions (keep cloc's where there are overlaps)
 | 
						|
    merge_lang_def(
 | 
						|
        $opt_read_lang_def     , #        Sample values:
 | 
						|
        \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77'
 | 
						|
        \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
 | 
						|
        \%Language_by_File     , # Language_by_File{makefile}  = 'make'
 | 
						|
        \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] =
 | 
						|
                                 #      [ 'remove_matches' , '^\s*#'  ]
 | 
						|
        \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
 | 
						|
        \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
 | 
						|
        \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
 | 
						|
        \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
 | 
						|
        );
 | 
						|
}
 | 
						|
if ($opt_lang_no_ext and !defined $Filters_by_Language{$opt_lang_no_ext}) {
 | 
						|
    die_unknown_lang($opt_lang_no_ext, "--lang-no-ext")
 | 
						|
}
 | 
						|
check_scale_existence(\%Filters_by_Language, \%Language_by_Extension,
 | 
						|
                      \%Scale_Factor);
 | 
						|
 | 
						|
my $nCounted = 0;
 | 
						|
 | 
						|
# Process command line provided extension-to-language mapping overrides.
 | 
						|
# Make a hash of known languages in lower case for easier matching.
 | 
						|
my %Recognized_Language_lc = (); # key = language name in lc, value = true name
 | 
						|
foreach my $language (keys %Filters_by_Language) {
 | 
						|
    my $lang_lc = lc $language;
 | 
						|
    $Recognized_Language_lc{$lang_lc} = $language;
 | 
						|
}
 | 
						|
my %Forced_Extension = (); # file name extensions which user wants to count
 | 
						|
my $All_One_Language = 0;  # set to !0 if --force-lang's <ext> is missing
 | 
						|
foreach my $pair (@opt_force_lang) {
 | 
						|
    my ($lang, $extension) = split(',', $pair);
 | 
						|
    my $lang_lc = lc $lang;
 | 
						|
    if (defined $extension) {
 | 
						|
        $Forced_Extension{$extension} = $lang;
 | 
						|
 | 
						|
        die_unknown_lang($lang, "--force-lang")
 | 
						|
            unless $Recognized_Language_lc{$lang_lc};
 | 
						|
 | 
						|
        $Language_by_Extension{$extension} = $Recognized_Language_lc{$lang_lc};
 | 
						|
    } else {
 | 
						|
        # the scary case--count everything as this language
 | 
						|
        $All_One_Language = $Recognized_Language_lc{$lang_lc};
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
foreach my $pair (@opt_script_lang) {
 | 
						|
    my ($lang, $script_name) = split(',', $pair);
 | 
						|
    my $lang_lc = lc $lang;
 | 
						|
    if (!defined $script_name) {
 | 
						|
        die "The --script-lang option requires a comma separated pair of ".
 | 
						|
            "strings.\n";
 | 
						|
    }
 | 
						|
 | 
						|
    die_unknown_lang($lang, "--script-lang")
 | 
						|
        unless $Recognized_Language_lc{$lang_lc};
 | 
						|
 | 
						|
    $Language_by_Script{$script_name} = $Recognized_Language_lc{$lang_lc};
 | 
						|
}
 | 
						|
 | 
						|
# If user provided a language definition file, make sure those
 | 
						|
# extensions aren't rejected.
 | 
						|
foreach my $ext (%Language_by_Extension) {
 | 
						|
    next unless defined $Not_Code_Extension{$ext};
 | 
						|
    delete $Not_Code_Extension{$ext};
 | 
						|
}
 | 
						|
 | 
						|
# If user provided file extensions to ignore, add these to
 | 
						|
# the exclusion list.
 | 
						|
foreach my $ext (map { $_ => 1 } split(/,/, $opt_exclude_ext ) ) {
 | 
						|
    $ext = lc $ext if $ON_WINDOWS or $opt_ignore_case_ext;
 | 
						|
    $Not_Code_Extension{$ext} = 1;
 | 
						|
}
 | 
						|
 | 
						|
# If SQL or --by-file output is requested, keep track of directory names
 | 
						|
# generated by File::Temp::tempdir and used to temporarily hold the results
 | 
						|
# of compressed archives.  Contents of the SQL table 't' will be much
 | 
						|
# cleaner if these meaningless directory names are stripped from the front
 | 
						|
# of files pulled from the archives.
 | 
						|
my %TEMP_DIR = ();
 | 
						|
my $TEMP_OFF =  0;  # Needed for --sdir; keep track of the number of
 | 
						|
                    # scratch directories made in this run to avoid
 | 
						|
                    # file overwrites by multiple extractions to same
 | 
						|
                    # sdir.
 | 
						|
# Also track locations where temporary installations, if necessary, of
 | 
						|
# Algorithm::Diff and/or Regexp::Common are done.  Make sure these
 | 
						|
# directories are not counted as inputs (ref bug #80 2012-11-23).
 | 
						|
my %TEMP_INST = ();
 | 
						|
 | 
						|
# invert %Language_by_Script hash to get an easy-to-look-up list of known
 | 
						|
# scripting languages
 | 
						|
my %Script_Language = map { $_ => 1 } values %Language_by_Script ;
 | 
						|
# 1}}}
 | 
						|
# Step 2:  Early exits for display, summation. {{{1
 | 
						|
#
 | 
						|
print_extension_info(   $opt_show_ext     ) if defined $opt_show_ext ;
 | 
						|
print_language_info(    $opt_show_lang, '') if defined $opt_show_lang;
 | 
						|
print_language_filters( $opt_explain      ) if defined $opt_explain  ;
 | 
						|
exit if (defined $opt_show_ext)  or
 | 
						|
        (defined $opt_show_lang) or
 | 
						|
        (defined $opt_explain)   or
 | 
						|
        $list_no_autogen;
 | 
						|
 | 
						|
Top_of_Processing_Loop:
 | 
						|
# Sorry, coding purists.  Using a goto to implement --count-and-diff
 | 
						|
# which has to do three passes over the main code, starting with
 | 
						|
# a clean slate each time.
 | 
						|
if ($opt_count_diff) {
 | 
						|
    @ARGV = ( $COUNT_DIFF_ARGV[ $opt_count_diff ] );
 | 
						|
    if ($opt_count_diff == 3) {
 | 
						|
        $opt_diff = 1;
 | 
						|
        @ARGV = @{$COUNT_DIFF_ARGV[ $opt_count_diff ]}; # last arg is list of list
 | 
						|
    } elsif ($opt_diff_list_files) {
 | 
						|
        $opt_diff = 0;
 | 
						|
    }
 | 
						|
    if ($opt_report_file) {
 | 
						|
        # Instead of just one output file, will have three.
 | 
						|
        # Keep their names unique otherwise results are clobbered.
 | 
						|
        # Replace file path separators with underscores otherwise
 | 
						|
        # may end up with illegal file names.
 | 
						|
        my ($fn_0, $fn_1) = (undef, undef);
 | 
						|
        if ($ON_WINDOWS) {
 | 
						|
            ($fn_0 = $ARGV[0]) =~ s{\\}{_}g;
 | 
						|
             $fn_0 =~ s{:}{_}g;
 | 
						|
             $fn_0 =~ s{/}{_}g;
 | 
						|
            ($fn_1 = $ARGV[1]) =~ s{\\}{_}g if defined $ARGV[1];
 | 
						|
             $fn_1 =~ s{:}{_}g              if defined $ARGV[1];
 | 
						|
             $fn_1 =~ s{/}{_}g              if defined $ARGV[1];
 | 
						|
        } else {
 | 
						|
            ($fn_0 = $ARGV[0]) =~ s{/}{_}g;
 | 
						|
            ($fn_1 = $ARGV[1]) =~ s{/}{_}g  if defined $ARGV[1];
 | 
						|
        }
 | 
						|
 | 
						|
        if      ($opt_count_diff == 3) {
 | 
						|
            $opt_report_file = $COUNT_DIFF_report_file . ".diff.$fn_0.$fn_1";
 | 
						|
        } else {
 | 
						|
            $opt_report_file = $COUNT_DIFF_report_file . ".$fn_0";
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
        # STDOUT; print a header showing what it's working on
 | 
						|
        if ($opt_count_diff == 3) {
 | 
						|
            print "\ndiff $ARGV[0] $ARGV[1]::\n";
 | 
						|
        } else {
 | 
						|
            print "\n" if $opt_count_diff > 1;
 | 
						|
            print "$ARGV[0]::\n";
 | 
						|
        }
 | 
						|
    }
 | 
						|
    $ALREADY_SHOWED_HEADER      = 0;
 | 
						|
    $ALREADY_SHOWED_XML_SECTION = 0;
 | 
						|
}
 | 
						|
 | 
						|
#print "Before glob have [", join(",", @ARGV), "]\n";
 | 
						|
@ARGV = windows_glob(@ARGV) if $ON_WINDOWS;
 | 
						|
#print "after  glob have [", join(",", @ARGV), "]\n";
 | 
						|
 | 
						|
# filter out archive files if requested to do so
 | 
						|
if (defined $opt_skip_archive) {
 | 
						|
    my @non_archive = ();
 | 
						|
    foreach my $candidate (@ARGV) {
 | 
						|
        if ($candidate !~ m/${opt_skip_archive}$/) {
 | 
						|
            push @non_archive, $candidate;
 | 
						|
 | 
						|
        }
 | 
						|
    }
 | 
						|
    @ARGV = @non_archive;
 | 
						|
}
 | 
						|
 | 
						|
if ($opt_sum_reports and $opt_diff) {
 | 
						|
    my @results = ();
 | 
						|
    if ($opt_csv and !defined($opt_csv_delimiter)) {
 | 
						|
        $opt_csv_delimiter = ",";
 | 
						|
    }
 | 
						|
    if ($opt_list_file) { # read inputs from the list file
 | 
						|
        my @list = read_list_file($opt_list_file);
 | 
						|
        if ($opt_csv) {
 | 
						|
            @results = combine_csv_diffs($opt_csv_delimiter, \@list);
 | 
						|
        } else {
 | 
						|
            @results = combine_diffs(\@list);
 | 
						|
        }
 | 
						|
    } elsif ($opt_vcs) { # read inputs from the VCS generator
 | 
						|
        my @list = invoke_generator($opt_vcs, \@ARGV);
 | 
						|
        if ($opt_csv) {
 | 
						|
            @results = combine_csv_diffs($opt_csv_delimiter, \@list);
 | 
						|
        } else {
 | 
						|
            @results = combine_diffs(\@list);
 | 
						|
        }
 | 
						|
    } else { # get inputs from the command line
 | 
						|
        if ($opt_csv) {
 | 
						|
            @results = combine_csv_diffs($opt_csv_delimiter, \@ARGV);
 | 
						|
        } else {
 | 
						|
            @results = combine_diffs(\@ARGV);
 | 
						|
        }
 | 
						|
    }
 | 
						|
    if ($opt_report_file) {
 | 
						|
        write_file($opt_report_file, {}, @results);
 | 
						|
    } else {
 | 
						|
        print "\n", join("\n", @results), "\n";
 | 
						|
    }
 | 
						|
    exit;
 | 
						|
}
 | 
						|
if ($opt_sum_reports) {
 | 
						|
    my %Results = ();
 | 
						|
    foreach my $type( "by language", "by report file" ) {
 | 
						|
        my $found_lang = undef;
 | 
						|
        if ($opt_list_file or $opt_vcs) {
 | 
						|
            # read inputs from the list file
 | 
						|
            my @list;
 | 
						|
            if ($opt_vcs) {
 | 
						|
                @list = invoke_generator($opt_vcs, \@ARGV);
 | 
						|
            } else {
 | 
						|
                @list = read_list_file($opt_list_file);
 | 
						|
            }
 | 
						|
            $found_lang = combine_results(\@list,
 | 
						|
                                           $type,
 | 
						|
                                          \%{$Results{ $type }},
 | 
						|
                                          \%Filters_by_Language );
 | 
						|
        } else { # get inputs from the command line
 | 
						|
            $found_lang = combine_results(\@ARGV,
 | 
						|
                                           $type,
 | 
						|
                                          \%{$Results{ $type }},
 | 
						|
                                          \%Filters_by_Language );
 | 
						|
        }
 | 
						|
        next unless %Results;
 | 
						|
        my $end_time = get_time();
 | 
						|
        my @results  = generate_report($VERSION, $end_time - $start_time,
 | 
						|
                                       $type,
 | 
						|
                                      \%{$Results{ $type }}, \%Scale_Factor);
 | 
						|
        if ($opt_report_file) {
 | 
						|
            my $ext  = ".lang";
 | 
						|
               $ext  = ".file" unless $type eq "by language";
 | 
						|
            next if !$found_lang and  $ext  eq ".lang";
 | 
						|
            write_file($opt_report_file . $ext, {}, @results);
 | 
						|
        } else {
 | 
						|
            print "\n", join("\n", @results), "\n";
 | 
						|
        }
 | 
						|
    }
 | 
						|
    exit;
 | 
						|
}
 | 
						|
if ($opt_write_lang_def or $opt_write_lang_def_incl_dup) {
 | 
						|
    my $file = $opt_write_lang_def          if $opt_write_lang_def;
 | 
						|
       $file = $opt_write_lang_def_incl_dup if $opt_write_lang_def_incl_dup;
 | 
						|
    write_lang_def($file                 ,
 | 
						|
                  \%Language_by_Extension,
 | 
						|
                  \%Language_by_Script   ,
 | 
						|
                  \%Language_by_File     ,
 | 
						|
                  \%Filters_by_Language  ,
 | 
						|
                  \%Not_Code_Extension   ,
 | 
						|
                  \%Not_Code_Filename    ,
 | 
						|
                  \%Scale_Factor         ,
 | 
						|
                  \%EOL_Continuation_re  ,
 | 
						|
                  );
 | 
						|
    exit;
 | 
						|
}
 | 
						|
if ($opt_show_os) {
 | 
						|
    if ($ON_WINDOWS) {
 | 
						|
        print "Windows\n";
 | 
						|
    } else {
 | 
						|
        print "UNIX\n";
 | 
						|
    }
 | 
						|
    exit;
 | 
						|
}
 | 
						|
 | 
						|
my $max_processes = get_max_processes();
 | 
						|
 | 
						|
# 1}}}
 | 
						|
# Step 3:  Create a list of files to consider. {{{1
 | 
						|
#  a) If inputs are binary archives, first cd to a temp
 | 
						|
#     directory, expand the archive with the user-given
 | 
						|
#     extraction tool, then add the temp directory to
 | 
						|
#     the list of dirs to process.
 | 
						|
#  b) Create a list of every file that might contain source
 | 
						|
#     code.  Ignore binary files, zero-sized files, and
 | 
						|
#     any file in a directory the user says to exclude.
 | 
						|
#  c) Determine the language for each file in the list.
 | 
						|
#
 | 
						|
my @binary_archive = ();
 | 
						|
my $cwd            = cwd();
 | 
						|
if ($opt_extract_with) {
 | 
						|
#print "cwd main = [$cwd]\n";
 | 
						|
    my @extract_location = ();
 | 
						|
    foreach my $bin_file (@ARGV) {
 | 
						|
        my $extract_dir = undef;
 | 
						|
        if ($opt_sdir) {
 | 
						|
            ++$TEMP_OFF;
 | 
						|
            $extract_dir = "$opt_sdir/$TEMP_OFF";
 | 
						|
            File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
 | 
						|
            File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
 | 
						|
        } else {
 | 
						|
            $extract_dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
 | 
						|
        }
 | 
						|
        $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
 | 
						|
        print "mkdir $extract_dir\n"  if $opt_v;
 | 
						|
        print "cd    $extract_dir\n"  if $opt_v;
 | 
						|
        chdir $extract_dir;
 | 
						|
        my $bin_file_full_path = "";
 | 
						|
        if (File::Spec->file_name_is_absolute( $bin_file )) {
 | 
						|
            $bin_file_full_path = $bin_file;
 | 
						|
#print "bin_file_full_path (was ful) = [$bin_file_full_path]\n";
 | 
						|
        } else {
 | 
						|
            $bin_file_full_path = File::Spec->catfile( $cwd, $bin_file );
 | 
						|
#print "bin_file_full_path (was rel) = [$bin_file_full_path]\n";
 | 
						|
        }
 | 
						|
        my     $extract_cmd = uncompress_archive_cmd($bin_file_full_path);
 | 
						|
        print  $extract_cmd, "\n" if $opt_v;
 | 
						|
        system $extract_cmd;
 | 
						|
        push @extract_location, $extract_dir;
 | 
						|
        chdir $cwd;
 | 
						|
    }
 | 
						|
    # It is possible that the binary archive itself contains additional
 | 
						|
    # files compressed the same way (true for Java .ear files).  Go
 | 
						|
    # through all the files that were extracted, see if they are binary
 | 
						|
    # archives and try to extract them.  Lather, rinse, repeat.
 | 
						|
    my $binary_archives_exist = 1;
 | 
						|
    my $count_binary_archives = 0;
 | 
						|
    my $previous_count        = 0;
 | 
						|
    my $n_pass                = 0;
 | 
						|
    while ($binary_archives_exist) {
 | 
						|
        @binary_archive = ();
 | 
						|
        foreach my $dir (@extract_location) {
 | 
						|
            find(\&archive_files, $dir);  # populates global @binary_archive
 | 
						|
        }
 | 
						|
        foreach my $archive (@binary_archive) {
 | 
						|
            my $extract_dir = undef;
 | 
						|
            if ($opt_sdir) {
 | 
						|
                ++$TEMP_OFF;
 | 
						|
                $extract_dir = "$opt_sdir/$TEMP_OFF";
 | 
						|
                File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
 | 
						|
                File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
 | 
						|
            } else {
 | 
						|
                $extract_dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
 | 
						|
            }
 | 
						|
            $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
 | 
						|
            print "mkdir $extract_dir\n"  if $opt_v;
 | 
						|
            print "cd    $extract_dir\n"  if $opt_v;
 | 
						|
            chdir  $extract_dir;
 | 
						|
 | 
						|
            my     $extract_cmd = uncompress_archive_cmd($archive);
 | 
						|
            print  $extract_cmd, "\n" if $opt_v;
 | 
						|
            system $extract_cmd;
 | 
						|
            push @extract_location, $extract_dir;
 | 
						|
            unlink $archive;  # otherwise will be extracting it forever
 | 
						|
        }
 | 
						|
        $count_binary_archives = scalar @binary_archive;
 | 
						|
        if ($count_binary_archives == $previous_count) {
 | 
						|
            $binary_archives_exist = 0;
 | 
						|
        }
 | 
						|
        $previous_count = $count_binary_archives;
 | 
						|
    }
 | 
						|
    chdir $cwd;
 | 
						|
 | 
						|
    @ARGV = @extract_location;
 | 
						|
} else {
 | 
						|
    # see if any of the inputs need to be auto-uncompressed &/or expanded
 | 
						|
    my @updated_ARGS = ();
 | 
						|
    replace_git_hash_with_tarfile(\@ARGV, \@git_similarity) if $opt_force_git;
 | 
						|
    foreach my $Arg (@ARGV) {
 | 
						|
        if (is_dir($Arg)) {
 | 
						|
            push @updated_ARGS, $Arg;
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        my $full_path = "";
 | 
						|
        if (File::Spec->file_name_is_absolute( $Arg )) {
 | 
						|
            $full_path = $Arg;
 | 
						|
        } else {
 | 
						|
            $full_path = File::Spec->catfile( $cwd, $Arg );
 | 
						|
        }
 | 
						|
#print "full_path = [$full_path]\n";
 | 
						|
        my $extract_cmd = uncompress_archive_cmd($full_path);
 | 
						|
        if ($extract_cmd) {
 | 
						|
            my $extract_dir = undef;
 | 
						|
            if ($opt_sdir) {
 | 
						|
                ++$TEMP_OFF;
 | 
						|
                $extract_dir = "$opt_sdir/$TEMP_OFF";
 | 
						|
                File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
 | 
						|
                File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
 | 
						|
            } else {
 | 
						|
                $extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
 | 
						|
            }
 | 
						|
            $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
 | 
						|
            print "mkdir $extract_dir\n"  if $opt_v;
 | 
						|
            print "cd    $extract_dir\n"  if $opt_v;
 | 
						|
            chdir  $extract_dir;
 | 
						|
            print  $extract_cmd, "\n" if $opt_v;
 | 
						|
            system $extract_cmd;
 | 
						|
            push @updated_ARGS, $extract_dir;
 | 
						|
            chdir $cwd;
 | 
						|
        } else {
 | 
						|
            # this is a conventional, uncompressed, unarchived file
 | 
						|
            # or a directory; keep as-is
 | 
						|
            push @updated_ARGS, $Arg;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    @ARGV = @updated_ARGS;
 | 
						|
 | 
						|
    # make sure we're not counting any directory containing
 | 
						|
    # temporary installations of Regexp::Common, Algorithm::Diff
 | 
						|
    foreach my $d (sort keys %TEMP_INST) {
 | 
						|
        foreach my $a (@ARGV) {
 | 
						|
            next unless is_dir($a);
 | 
						|
            if ($opt_v > 2) {
 | 
						|
                printf "Comparing %s (location of %s) to input [%s]\n",
 | 
						|
                        $d, $TEMP_INST{$d}, $a;
 | 
						|
            }
 | 
						|
            if ($a eq $d) {
 | 
						|
                die "File::Temp::tempdir chose directory ",
 | 
						|
                    $d, " to install ", $TEMP_INST{$d}, " but this ",
 | 
						|
                    "matches one of your input directories.  Rerun ",
 | 
						|
                    "with --sdir and supply a different temporary ",
 | 
						|
                    "directory for ", $TEMP_INST{$d}, "\n";
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
# 1}}}
 | 
						|
my @Errors    = ();
 | 
						|
my @file_list = ();  # global variable updated in files()
 | 
						|
my %Ignored   = ();  # files that are not counted (language not recognized or
 | 
						|
                     # problems reading the file)
 | 
						|
my @Lines_Out = ();
 | 
						|
my %upper_lower_map = ();  # global variable (needed only on Windows) to
 | 
						|
                           # track case of original filename, populated in
 | 
						|
                           # make_file_list() if $ON_WINDOWS
 | 
						|
if ($opt_diff) {
 | 
						|
# Step 4:  Separate code from non-code files.  {{{1
 | 
						|
my @fh            = ();
 | 
						|
my @files_for_set = ();
 | 
						|
my @files_added_tot = ();
 | 
						|
my @files_removed_tot = ();
 | 
						|
my @file_pairs_tot = ();
 | 
						|
# make file lists for each separate argument
 | 
						|
if ($opt_diff_list_file) {
 | 
						|
    @files_for_set = ( (), () );
 | 
						|
    file_pairs_from_file($opt_diff_list_file, # in
 | 
						|
                        \@files_added_tot   , # out
 | 
						|
                        \@files_removed_tot , # out
 | 
						|
                        \@file_pairs_tot    , # out
 | 
						|
                       );
 | 
						|
    foreach my $F (@files_added_tot) {
 | 
						|
        if ($ON_WINDOWS) {
 | 
						|
            (my $lc = lc $F) =~ s{\\}{/}g;
 | 
						|
            $upper_lower_map{$lc} = $F;
 | 
						|
            $F = $lc;
 | 
						|
        }
 | 
						|
        push @{$files_for_set[1]}, $F;
 | 
						|
    }
 | 
						|
    foreach my $F (@files_removed_tot) {
 | 
						|
        if ($ON_WINDOWS) {
 | 
						|
            (my $lc = lc $F) =~ s{\\}{/}g;
 | 
						|
            $upper_lower_map{$lc} = $F;
 | 
						|
            $F = $lc;
 | 
						|
        }
 | 
						|
        push @{$files_for_set[0]}, $F;
 | 
						|
    }
 | 
						|
    foreach my $pair (@file_pairs_tot) {
 | 
						|
        if ($ON_WINDOWS) {
 | 
						|
            push @{$files_for_set[0]}, lc $pair->[0];
 | 
						|
            push @{$files_for_set[1]}, lc $pair->[1];
 | 
						|
        } else {
 | 
						|
            push @{$files_for_set[0]}, $pair->[0];
 | 
						|
            push @{$files_for_set[1]}, $pair->[1];
 | 
						|
        }
 | 
						|
    }
 | 
						|
    @ARGV = (1, 2); # place holders
 | 
						|
}
 | 
						|
for (my $i = 0; $i < scalar @ARGV; $i++) {
 | 
						|
    if ($opt_diff_list_file) {
 | 
						|
        push @fh, make_file_list($files_for_set[$i], $i+1,
 | 
						|
                                \%Error_Codes, \@Errors, \%Ignored);
 | 
						|
        @{$files_for_set[$i]} = @file_list;
 | 
						|
    } elsif ($opt_diff_list_files) {
 | 
						|
        my @list_files = read_list_file($ARGV[$i]);
 | 
						|
        push @fh, make_file_list(\@list_files, $i+1,
 | 
						|
                                \%Error_Codes, \@Errors, \%Ignored);
 | 
						|
        @{$files_for_set[$i]} = @file_list;
 | 
						|
    } else {
 | 
						|
        push @fh, make_file_list([ $ARGV[$i] ], $i+1,
 | 
						|
                                \%Error_Codes, \@Errors, \%Ignored);
 | 
						|
        @{$files_for_set[$i]} = @file_list;
 | 
						|
    }
 | 
						|
    if ($opt_exclude_list_file) {
 | 
						|
        # note: process_exclude_list_file() references global @file_list
 | 
						|
        process_exclude_list_file($opt_exclude_list_file,
 | 
						|
                                 \%Exclude_Dir,
 | 
						|
                                 \%Ignored);
 | 
						|
    }
 | 
						|
    if ($opt_no_autogen) {
 | 
						|
        exclude_autogenerated_files(\@{$files_for_set[$i]},  # in/out
 | 
						|
                                    \%Error_Codes, \@Errors, \%Ignored);
 | 
						|
    }
 | 
						|
    @file_list = ();
 | 
						|
}
 | 
						|
# 1}}}
 | 
						|
# Step 5:  Remove duplicate files.             {{{1
 | 
						|
#
 | 
						|
my %Language           = ();
 | 
						|
my %unique_source_file = ();
 | 
						|
my $n_set = 0;
 | 
						|
foreach my $FH (@fh) {  # loop over each pair of file sets
 | 
						|
    ++$n_set;
 | 
						|
    remove_duplicate_files($FH,
 | 
						|
                               \%{$Language{$FH}}               ,
 | 
						|
                               \%{$unique_source_file{$FH}}     ,
 | 
						|
                          \%Error_Codes                         ,
 | 
						|
                               \@Errors                         ,
 | 
						|
                               \%Ignored                        );
 | 
						|
    if ($opt_exclude_content) {
 | 
						|
        exclude_by_regex($opt_exclude_content,              # in
 | 
						|
                        \%{$unique_source_file{$FH}},       # in/out
 | 
						|
                        \%Ignored);                         # out
 | 
						|
    } elsif ($opt_include_content) {
 | 
						|
        include_by_regex($opt_include_content,              # in
 | 
						|
                        \%{$unique_source_file{$FH}},       # in/out
 | 
						|
                        \%Ignored);                         # out
 | 
						|
    }
 | 
						|
 | 
						|
    if ($opt_include_lang) {
 | 
						|
        # remove files associated with languages not
 | 
						|
        # specified by --include-lang
 | 
						|
        my @delete_file = ();
 | 
						|
        foreach my $file (keys %{$unique_source_file{$FH}}) {
 | 
						|
            my $keep_file = 0;
 | 
						|
            foreach my $keep_lang (keys %Include_Language) {
 | 
						|
                if (lc($Language{$FH}{$file}) eq $keep_lang) {
 | 
						|
                    $keep_file = 1;
 | 
						|
                    last;
 | 
						|
                }
 | 
						|
            }
 | 
						|
            next if $keep_file;
 | 
						|
            push @delete_file, $file;
 | 
						|
        }
 | 
						|
        foreach my $file (@delete_file) {
 | 
						|
            delete $Language{$FH}{$file};
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    printf "%2d: %8d unique file%s.                          \r",
 | 
						|
        $n_set,
 | 
						|
        plural_form(scalar keys %unique_source_file)
 | 
						|
        unless $opt_quiet;
 | 
						|
}
 | 
						|
# 1}}}
 | 
						|
# Step 6:  Count code, comments, blank lines.  {{{1
 | 
						|
#
 | 
						|
my %Results_by_Language = ();
 | 
						|
my %Results_by_File     = ();
 | 
						|
my %Delta_by_Language   = ();
 | 
						|
my %Delta_by_File       = ();
 | 
						|
 | 
						|
my %alignment = ();
 | 
						|
 | 
						|
my $fset_a = $fh[0];
 | 
						|
my $fset_b = $fh[1];
 | 
						|
 | 
						|
my $n_filepairs_compared = 0;
 | 
						|
my $tot_counted = 0;
 | 
						|
 | 
						|
if ( scalar @fh != 2 ) {
 | 
						|
    print "Error: incorrect length fh array when preparing diff at step 6.\n";
 | 
						|
    exit 1;
 | 
						|
}
 | 
						|
if (!$opt_diff_list_file) {
 | 
						|
    align_by_pairs(\%{$unique_source_file{$fset_a}}      , # in
 | 
						|
                   \%{$unique_source_file{$fset_b}}      , # in
 | 
						|
                   \@files_added_tot                     , # out
 | 
						|
                   \@files_removed_tot                   , # out
 | 
						|
                   \@file_pairs_tot                      , # out
 | 
						|
                  );
 | 
						|
}
 | 
						|
 | 
						|
#use Data::Dumper;
 | 
						|
#print "added : ", Dumper(\@files_added_tot);
 | 
						|
#print "removed : ", Dumper(\@files_removed_tot);
 | 
						|
#print "pairs : ", Dumper(\@file_pairs_tot);
 | 
						|
 | 
						|
if ( $max_processes == 0) {
 | 
						|
    # Multiprocessing is disabled
 | 
						|
    my $part = count_filesets ( $fset_a, $fset_b, \@files_added_tot,
 | 
						|
                               \@files_removed_tot, \@file_pairs_tot,
 | 
						|
                               0, \%Language, \%Ignored);
 | 
						|
    %Results_by_File = %{$part->{'results_by_file'}};
 | 
						|
    %Results_by_Language= %{$part->{'results_by_language'}};
 | 
						|
    %Delta_by_File = %{$part->{'delta_by_file'}};
 | 
						|
    %Delta_by_Language= %{$part->{'delta_by_language'}};
 | 
						|
    %Ignored = ( %Ignored, %{$part->{'ignored'}});
 | 
						|
    %alignment = %{$part->{'alignment'}};
 | 
						|
    $n_filepairs_compared = $part->{'n_filepairs_compared'};
 | 
						|
    push ( @Errors, @{$part->{'errors'}});
 | 
						|
} else {
 | 
						|
    # Multiprocessing is enabled
 | 
						|
    # Do not create more processes than the amount of data to be processed
 | 
						|
    my $num_processes = min(max(scalar @files_added_tot,
 | 
						|
                                scalar @files_removed_tot,
 | 
						|
                                scalar @file_pairs_tot),
 | 
						|
                            $max_processes);
 | 
						|
    # ... but use at least one process.
 | 
						|
       $num_processes = 1
 | 
						|
            if $num_processes == 0;
 | 
						|
    # Start processes for counting
 | 
						|
    my $pm = Parallel::ForkManager->new($num_processes);
 | 
						|
    # When processes finish, they will use the embedded subroutine for
 | 
						|
    # merging the data into global variables.
 | 
						|
    $pm->run_on_finish ( sub {
 | 
						|
        my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $part) = @_;
 | 
						|
        my $part_ignored = $part->{'ignored'};
 | 
						|
        my $part_result_by_file = $part->{'results_by_file'};
 | 
						|
        my $part_result_by_language = $part->{'results_by_language'};
 | 
						|
        my $part_delta_by_file = $part->{'delta_by_file'};
 | 
						|
        my $part_delta_by_language = $part->{'delta_by_language'};
 | 
						|
        my $part_alignment = $part->{'alignment'};
 | 
						|
        my $part_errors = $part->{'errors'};
 | 
						|
           $tot_counted += scalar keys %$part_result_by_file;
 | 
						|
           $n_filepairs_compared += $part->{'n_filepairs_compared'};
 | 
						|
        # Since files are processed by multiple processes, we can't measure
 | 
						|
        # the number of processed files exactly. We approximate this by showing
 | 
						|
        # the number of files counted by finished processes.
 | 
						|
        printf "Counting:  %d\r", $tot_counted
 | 
						|
                 if $opt_progress_rate;
 | 
						|
 | 
						|
        foreach my $this_language ( keys %$part_result_by_language ) {
 | 
						|
            my $counts = $part_result_by_language->{$this_language};
 | 
						|
            foreach my $inner_key ( keys %$counts ) {
 | 
						|
                $Results_by_Language{$this_language}{$inner_key} +=
 | 
						|
                    $counts->{$inner_key};
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        foreach my $this_language ( keys %$part_delta_by_language ) {
 | 
						|
            my $counts = $part_delta_by_language->{$this_language};
 | 
						|
            foreach my $inner_key ( keys %$counts ) {
 | 
						|
                my $statuses = $counts->{$inner_key};
 | 
						|
                foreach my $inner_status ( keys %$statuses ) {
 | 
						|
                    $Delta_by_Language{$this_language}{$inner_key}{$inner_status} +=
 | 
						|
                          $counts->{$inner_key}->{$inner_status};
 | 
						|
                }
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        foreach my $label ( keys %$part_alignment ) {
 | 
						|
            my $inner = $part_alignment->{$label};
 | 
						|
            foreach my $key ( keys %$inner ) {
 | 
						|
                $alignment{$label}{$key} = 1;
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        %Results_by_File = ( %Results_by_File, %$part_result_by_file );
 | 
						|
        %Delta_by_File = ( %Delta_by_File, %$part_delta_by_file );
 | 
						|
        %Ignored = (%Ignored, %$part_ignored );
 | 
						|
        push ( @Errors, @$part_errors );
 | 
						|
    } );
 | 
						|
 | 
						|
    my $num_filepairs_per_part = ceil ( ( scalar @file_pairs_tot ) / $num_processes );
 | 
						|
    my $num_filesremoved_per_part = ceil ( ( scalar @files_removed_tot ) / $num_processes );
 | 
						|
    my $num_filesadded_per_part = ceil ( ( scalar @files_added_tot ) / $num_processes );
 | 
						|
 | 
						|
    while ( 1 ) {
 | 
						|
        my @files_added_part = splice @files_added_tot, 0, $num_filesadded_per_part;
 | 
						|
        my @files_removed_part = splice @files_removed_tot, 0, $num_filesremoved_per_part;
 | 
						|
        my @filepairs_part = splice @file_pairs_tot, 0, $num_filepairs_per_part;
 | 
						|
        if ( scalar @files_added_part == 0 and scalar @files_removed_part == 0 and
 | 
						|
             scalar @filepairs_part == 0 ) {
 | 
						|
            last;
 | 
						|
        }
 | 
						|
 | 
						|
        $pm->start() and next;
 | 
						|
        my $count_result = count_filesets ( $fset_a, $fset_b,
 | 
						|
            \@files_added_part, \@files_removed_part,
 | 
						|
            \@filepairs_part, 1, \%Language, \%Ignored );
 | 
						|
        $pm->finish(0 , $count_result);
 | 
						|
    }
 | 
						|
    # Wait for processes to finish
 | 
						|
    $pm->wait_all_children();
 | 
						|
}
 | 
						|
 | 
						|
# Write alignment data, if needed
 | 
						|
if ($opt_diff_alignment) {
 | 
						|
    write_alignment_data ( $opt_diff_alignment, $n_filepairs_compared, \%alignment ) ;
 | 
						|
}
 | 
						|
 | 
						|
my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ": ";
 | 
						|
my @ignored_reasons = map { "${_}${separator} $Ignored{$_}" } sort keys %Ignored;
 | 
						|
write_file($opt_ignored, {"file_type" => "ignored",
 | 
						|
                          "separator" => ": ",
 | 
						|
                          "columns"   => ["file", "reason"],
 | 
						|
                         }, @ignored_reasons   ) if $opt_ignored;
 | 
						|
write_file($opt_counted, {}, sort keys %Results_by_File) if $opt_counted;
 | 
						|
# 1}}}
 | 
						|
# Step 7:  Assemble results.                   {{{1
 | 
						|
#
 | 
						|
my $end_time = get_time();
 | 
						|
printf "%8d file%s ignored.                           \n",
 | 
						|
    plural_form(scalar keys %Ignored) unless $opt_quiet;
 | 
						|
print_errors(\%Error_Codes, \@Errors) if @Errors;
 | 
						|
if (!%Delta_by_Language) {
 | 
						|
    print "Nothing to count.\n";
 | 
						|
    exit;
 | 
						|
}
 | 
						|
 | 
						|
if ($opt_by_file) {
 | 
						|
    @Lines_Out = diff_report($VERSION, get_time() - $start_time,
 | 
						|
                            "by file",
 | 
						|
                            \%Delta_by_File, \%Scale_Factor);
 | 
						|
} else {
 | 
						|
    @Lines_Out = diff_report($VERSION, get_time() - $start_time,
 | 
						|
                            "by language",
 | 
						|
                            \%Delta_by_Language, \%Scale_Factor);
 | 
						|
}
 | 
						|
 | 
						|
# 1}}}
 | 
						|
} else {
 | 
						|
# Step 4:  Separate code from non-code files.  {{{1
 | 
						|
my $fh = 0;
 | 
						|
if ($opt_list_file or $opt_diff_list_files or $opt_vcs) {
 | 
						|
    my @list;
 | 
						|
    if ($opt_vcs) {
 | 
						|
        @list = invoke_generator($opt_vcs, \@ARGV);
 | 
						|
    } elsif ($opt_list_file) {
 | 
						|
        @list = read_list_file($opt_list_file);
 | 
						|
    } else {
 | 
						|
        @list = read_list_file($ARGV[0]);
 | 
						|
    }
 | 
						|
    $fh = make_file_list(\@list, 0, \%Error_Codes, \@Errors, \%Ignored);
 | 
						|
} else {
 | 
						|
    $fh = make_file_list(\@ARGV, 0, \%Error_Codes, \@Errors, \%Ignored);
 | 
						|
    #     make_file_list populates global variable @file_list via call to
 | 
						|
    #     File::Find's find() which in turn calls files()
 | 
						|
}
 | 
						|
if ($opt_exclude_list_file) {
 | 
						|
    # note: process_exclude_list_file() references global @file_list
 | 
						|
    process_exclude_list_file($opt_exclude_list_file,
 | 
						|
                             \%Exclude_Dir,
 | 
						|
                             \%Ignored);
 | 
						|
}
 | 
						|
if ($opt_skip_win_hidden and $ON_WINDOWS) {
 | 
						|
    my @file_list_minus_hidden = ();
 | 
						|
    # eval code to run on Unix without 'missing Win32::File module' error.
 | 
						|
    my $win32_file_invocation = '
 | 
						|
        use Win32::File;
 | 
						|
        foreach my $F (@file_list) {
 | 
						|
            my $attr = undef;
 | 
						|
            Win32::File::GetAttributes($F, $attr);
 | 
						|
            if ($attr & HIDDEN) {
 | 
						|
                $Ignored{$F} = "Windows hidden file";
 | 
						|
                print "Ignoring $F since it is a Windows hidden file\n"
 | 
						|
                    if $opt_v > 1;
 | 
						|
            } else {
 | 
						|
                push @file_list_minus_hidden, $F;
 | 
						|
            }
 | 
						|
        }';
 | 
						|
    eval $win32_file_invocation;
 | 
						|
    @file_list = @file_list_minus_hidden;
 | 
						|
}
 | 
						|
if ($opt_no_autogen) {
 | 
						|
    exclude_autogenerated_files(\@file_list,  # in/out
 | 
						|
                                \%Error_Codes, \@Errors, \%Ignored);
 | 
						|
}
 | 
						|
#printf "%8d file%s excluded.                     \n",
 | 
						|
#   plural_form(scalar keys %Ignored)
 | 
						|
#   unless $opt_quiet;
 | 
						|
# die print ": ", join("\n: ", @file_list), "\n";
 | 
						|
# 1}}}
 | 
						|
# Step 5:  Remove duplicate files.             {{{1
 | 
						|
#
 | 
						|
my %Language           = ();
 | 
						|
my %unique_source_file = ();
 | 
						|
remove_duplicate_files($fh                          ,   # in
 | 
						|
                           \%Language               ,   # out
 | 
						|
                           \%unique_source_file     ,   # out
 | 
						|
                      \%Error_Codes                 ,   # in
 | 
						|
                           \@Errors                 ,   # out
 | 
						|
                           \%Ignored                );  # out
 | 
						|
if ($opt_exclude_content) {
 | 
						|
    exclude_by_regex($opt_exclude_content,              # in
 | 
						|
                    \%unique_source_file ,              # in/out
 | 
						|
                    \%Ignored);                         # out
 | 
						|
} elsif ($opt_include_content) {
 | 
						|
    include_by_regex($opt_include_content,              # in
 | 
						|
                    \%unique_source_file ,              # in/out
 | 
						|
                    \%Ignored);                         # out
 | 
						|
}
 | 
						|
printf "%8d unique file%s.                              \n",
 | 
						|
    plural_form(scalar keys %unique_source_file)
 | 
						|
    unless $opt_quiet;
 | 
						|
# 1}}}
 | 
						|
# Step 6:  Count code, comments, blank lines.  {{{1
 | 
						|
#
 | 
						|
my %Results_by_Language = ();
 | 
						|
my %Results_by_File     = ();
 | 
						|
my @results_parts  = ();
 | 
						|
my @sorted_files = sort keys %unique_source_file;
 | 
						|
 | 
						|
if ( $max_processes == 0) {
 | 
						|
    # Multiprocessing is disabled
 | 
						|
    my $part = count_files ( \@sorted_files , 0, \%Language);
 | 
						|
    %Results_by_File = %{$part->{'results_by_file'}};
 | 
						|
    %Results_by_Language= %{$part->{'results_by_language'}};
 | 
						|
    %Ignored = ( %Ignored, %{$part->{'ignored'}});
 | 
						|
    push ( @Errors, @{$part->{'errors'}});
 | 
						|
} else {
 | 
						|
    # Do not create more processes than the number of files to be processed
 | 
						|
    my $num_files = scalar @sorted_files;
 | 
						|
    my $num_processes = $num_files >= $max_processes ? $max_processes : $num_files;
 | 
						|
    # Use at least one process.
 | 
						|
       $num_processes = 1
 | 
						|
            if $num_processes == 0;
 | 
						|
    # Start processes for counting
 | 
						|
    my $pm = Parallel::ForkManager->new($num_processes);
 | 
						|
    # When processes finish, they will use the embedded subroutine for
 | 
						|
    # merging the data into global variables.
 | 
						|
    $pm->run_on_finish ( sub {
 | 
						|
        my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $part) = @_;
 | 
						|
        my $part_ignored = $part->{'ignored'};
 | 
						|
        my $part_result_by_file = $part->{'results_by_file'};
 | 
						|
        my $part_result_by_language = $part->{'results_by_language'};
 | 
						|
        my $part_errors = $part->{'errors'};
 | 
						|
        my $nCounted+= scalar keys %$part_result_by_file;
 | 
						|
        # Since files are processed by multiple processes, we can't measure
 | 
						|
        # the number of processed files exactly. We approximate this by showing
 | 
						|
        # the number of files counted by finished processes.
 | 
						|
        printf "Counting:  %d\r", $nCounted
 | 
						|
                 if $opt_progress_rate;
 | 
						|
 | 
						|
        foreach my $this_language ( keys %$part_result_by_language ) {
 | 
						|
            my $counts = $part_result_by_language->{$this_language};
 | 
						|
            foreach my $inner_key ( keys %$counts ) {
 | 
						|
                $Results_by_Language{$this_language}{$inner_key} +=
 | 
						|
                    $counts->{$inner_key};
 | 
						|
            }
 | 
						|
        }
 | 
						|
        %Results_by_File = ( %Results_by_File, %$part_result_by_file );
 | 
						|
        %Ignored = (%Ignored, %$part_ignored);
 | 
						|
        push ( @Errors, @$part_errors);
 | 
						|
    } );
 | 
						|
    my $num_files_per_part = ceil ( ( scalar @sorted_files ) / $num_processes );
 | 
						|
    while ( my @part = splice @sorted_files, 0 , $num_files_per_part ) {
 | 
						|
        $pm->start() and next;
 | 
						|
        my $count_result = count_files ( \@part, 1, \%Language );
 | 
						|
        $pm->finish(0 , $count_result);
 | 
						|
    }
 | 
						|
    # Wait for processes to finish
 | 
						|
    $pm->wait_all_children();
 | 
						|
}
 | 
						|
 | 
						|
my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ": ";
 | 
						|
my @ignored_reasons = map { "${_}${separator} $Ignored{$_}" } sort keys %Ignored;
 | 
						|
write_file($opt_ignored, {"file_type" => "ignored",
 | 
						|
                          "separator" => $separator,
 | 
						|
                          "columns"   => ["file", "reason"],
 | 
						|
                         }, @ignored_reasons   ) if $opt_ignored;
 | 
						|
if ($opt_summary_cutoff) {
 | 
						|
    %Results_by_Language = apply_cutoff($opt_summary_cutoff,
 | 
						|
                                       \%Results_by_Language);
 | 
						|
}
 | 
						|
write_file($opt_counted, {}, sort keys %Results_by_File) if $opt_counted;
 | 
						|
# 1}}}
 | 
						|
# Step 7:  Assemble results.                   {{{1
 | 
						|
#
 | 
						|
my $end_time = get_time();
 | 
						|
printf "%8d file%s ignored.\n", plural_form(scalar keys %Ignored)
 | 
						|
    unless $opt_quiet;
 | 
						|
print_errors(\%Error_Codes, \@Errors) if @Errors;
 | 
						|
if (!%Results_by_Language) {
 | 
						|
    write_null_results($opt_json, $opt_xml, $opt_report_file);
 | 
						|
    exit;
 | 
						|
}
 | 
						|
 | 
						|
generate_sql($end_time - $start_time,
 | 
						|
            \%Results_by_File, \%Scale_Factor) if $opt_sql;
 | 
						|
 | 
						|
exit if $skip_generate_report;
 | 
						|
if      ($opt_by_file_by_lang) {
 | 
						|
    push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
 | 
						|
                                      "by file",
 | 
						|
                                      \%Results_by_File,    \%Scale_Factor);
 | 
						|
    push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
 | 
						|
                                      "by language",
 | 
						|
                                      \%Results_by_Language, \%Scale_Factor);
 | 
						|
} elsif ($opt_by_file) {
 | 
						|
    push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
 | 
						|
                                      "by file",
 | 
						|
                                      \%Results_by_File,    \%Scale_Factor);
 | 
						|
} else {
 | 
						|
    push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
 | 
						|
                                      "by language",
 | 
						|
                                      \%Results_by_Language, \%Scale_Factor);
 | 
						|
}
 | 
						|
# 1}}}
 | 
						|
}
 | 
						|
if ($opt_fmt) {
 | 
						|
    my $json_string = "";
 | 
						|
    write_file(\$json_string, {}, @Lines_Out);
 | 
						|
    my ($file_len, $lang_len, $header, %contents) = load_json($json_string);
 | 
						|
    @Lines_Out = print_format_n(abs($opt_fmt), $file_len, $lang_len, $header, %contents);
 | 
						|
}
 | 
						|
if ($opt_report_file) {
 | 
						|
    write_file($opt_report_file, {}, @Lines_Out);
 | 
						|
} else {
 | 
						|
    if ($opt_fmt) {
 | 
						|
        print "@Lines_Out";
 | 
						|
    } else {
 | 
						|
        print "\n" unless $opt_quiet;
 | 
						|
        print join("\n", @Lines_Out), "\n";
 | 
						|
    }
 | 
						|
}
 | 
						|
if ($opt_count_diff) {
 | 
						|
    ++$opt_count_diff;
 | 
						|
    exit if $opt_count_diff > 3;
 | 
						|
    goto Top_of_Processing_Loop;
 | 
						|
}
 | 
						|
sub summary_cutoff_error {                   # {{{
 | 
						|
    my ($parameter) = @_;
 | 
						|
    print "-> summary_cutoff_is_ok($parameter)\n" if $opt_v > 2;
 | 
						|
    my %known_keys = ( 'c' => 1, 'f' => 1, 'm' => 1, 'cm' => 1 );
 | 
						|
    my $result = "";
 | 
						|
    my $by_pct = 0;
 | 
						|
    my ($key, $value);
 | 
						|
    if ($parameter !~ /:/) {
 | 
						|
        $result = "expected a colon in --summary-cutoff argument";
 | 
						|
    } else {
 | 
						|
        ($key, $value) = split(':', $parameter, 2);
 | 
						|
        if ($value =~ /%$/) {
 | 
						|
            $by_pct = 1;
 | 
						|
            $value =~ s/%$//;
 | 
						|
        }
 | 
						|
        if (!$known_keys{$key}) {
 | 
						|
            $result = "--summary-cutoff argument:  '$key' is not 'c', 'f', 'm' or 'cm'";
 | 
						|
        }
 | 
						|
        if ($value !~ /^\d+(\.\d*)?$/) {
 | 
						|
            $result = "--summary-cutoff argument:  '$value' is not a number";
 | 
						|
        }
 | 
						|
    }
 | 
						|
    print "<- summary_cutoff_is_ok($result)\n" if $opt_v > 2;
 | 
						|
    return $result;
 | 
						|
} # 1}}}
 | 
						|
sub apply_cutoff {                           # {{{1
 | 
						|
    my ($criterion,
 | 
						|
        $rhh_by_lang) = @_;
 | 
						|
 | 
						|
    my %aggregated_Results_by_Language = ();
 | 
						|
    my $by_pct = 0;
 | 
						|
    my ($key, $value) = split(':', $criterion, 2);
 | 
						|
    if ($value =~ /%$/) {
 | 
						|
        $by_pct = 1;
 | 
						|
        $value =~ s/%$//;
 | 
						|
    }
 | 
						|
 | 
						|
    my %sum = ();
 | 
						|
    if ($by_pct) {
 | 
						|
        foreach my $lang (keys %{$rhh_by_lang}) {
 | 
						|
            foreach my $category (qw(nFiles comment blank code)) {
 | 
						|
                $sum{$category} += $rhh_by_lang->{$lang}{$category};
 | 
						|
            }
 | 
						|
        }
 | 
						|
        if      ($key eq 'c') {
 | 
						|
            $value *= $sum{'code'}/100;
 | 
						|
        } elsif ($key eq 'f') {
 | 
						|
            $value *= $sum{'nFiles'}/100;
 | 
						|
        } elsif ($key eq 'm') {
 | 
						|
            $value *= $sum{'comment'}/100;
 | 
						|
        } elsif ($key eq 'cm') {
 | 
						|
            $value *= ($sum{'code'} + $sum{'comment'})/100;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    foreach my $lang (keys %{$rhh_by_lang}) {
 | 
						|
        my %sum = ();
 | 
						|
        my $agg_lang = $lang;
 | 
						|
        if      ($key eq 'c') {
 | 
						|
            $agg_lang = 'Other' if $rhh_by_lang->{$lang}{'code'}    <= $value;
 | 
						|
        } elsif ($key eq 'f') {
 | 
						|
            $agg_lang = 'Other' if $rhh_by_lang->{$lang}{'nFiles'}  <= $value;
 | 
						|
        } elsif ($key eq 'm') {
 | 
						|
            $agg_lang = 'Other' if $rhh_by_lang->{$lang}{'comment'} <= $value;
 | 
						|
        } elsif ($key eq 'cm') {
 | 
						|
            $agg_lang = 'Other' if $rhh_by_lang->{$lang}{'code'} +
 | 
						|
                                      $rhh_by_lang->{$lang}{'comment'} <= $value;
 | 
						|
        }
 | 
						|
        foreach my $category (qw(nFiles comment blank code)) {
 | 
						|
            $aggregated_Results_by_Language{$agg_lang}{$category} +=
 | 
						|
                $rhh_by_lang->{$lang}{$category};
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    return %aggregated_Results_by_Language;
 | 
						|
} # 1}}}
 | 
						|
sub exclude_by_regex {                       # {{{1
 | 
						|
    my ($regex,
 | 
						|
        $rh_unique_source_file, # in/out
 | 
						|
        $rh_ignored           , # out
 | 
						|
       ) = @_;
 | 
						|
    my @exclude = ();
 | 
						|
    foreach my $file (keys %{$rh_unique_source_file}) {
 | 
						|
        my $line_num = 0;
 | 
						|
        foreach my $line (read_file($file)) {
 | 
						|
            ++$line_num;
 | 
						|
            if ($line =~ /$regex/) {
 | 
						|
                $rh_ignored->{$file} = "line $line_num match for --exclude-content=$regex";
 | 
						|
                push @exclude, $file;
 | 
						|
                last;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
    foreach my $file (@exclude) {
 | 
						|
        delete $rh_unique_source_file->{$file};
 | 
						|
    }
 | 
						|
} # 1}}}
 | 
						|
sub include_by_regex {                       # {{{1
 | 
						|
    my ($regex,
 | 
						|
        $rh_unique_source_file, # in/out
 | 
						|
        $rh_ignored           , # out
 | 
						|
       ) = @_;
 | 
						|
    my @exclude = ();
 | 
						|
    foreach my $file (keys %{$rh_unique_source_file}) {
 | 
						|
        my $keep_this_one = 0;
 | 
						|
        foreach my $line (read_file($file)) {
 | 
						|
            if ($line =~ /$regex/) {
 | 
						|
                $keep_this_one = 1;
 | 
						|
                last;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        if (!$keep_this_one) {
 | 
						|
            $rh_ignored->{$file} = "does not satisfy --include-content=$regex";
 | 
						|
            push @exclude, $file;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    foreach my $file (@exclude) {
 | 
						|
        delete $rh_unique_source_file->{$file};
 | 
						|
    }
 | 
						|
} # 1}}}
 | 
						|
sub get_max_processes {                      # {{{1
 | 
						|
    # If user has specified valid number of processes, use that.
 | 
						|
    if (defined $opt_processes) {
 | 
						|
        eval "use Parallel::ForkManager 0.7.6;";
 | 
						|
        if ( defined $Parallel::ForkManager::VERSION ) {
 | 
						|
            $HAVE_Parallel_ForkManager = 1;
 | 
						|
        }
 | 
						|
        if ( $opt_processes !~ /^\d+$/ ) {
 | 
						|
            print "Error: processes option argument must be numeric.\n";
 | 
						|
            exit 1;
 | 
						|
        }
 | 
						|
        elsif ( $opt_processes >0 and ! $HAVE_Parallel_ForkManager ) {
 | 
						|
            print "Error: cannot use multiple processes, because " .
 | 
						|
                  "Parallel::ForkManager is not installed, or the version is too old.\n";
 | 
						|
            exit 1;
 | 
						|
        }
 | 
						|
    elsif ( $opt_processes >0 and $ON_WINDOWS ) {
 | 
						|
            print "Error: cannot use multiple processes on Windows systems.\n";
 | 
						|
            exit 1;
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            return $opt_processes;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    # Disable multiprocessing on Windows - does not work reliably
 | 
						|
    if ($ON_WINDOWS) {
 | 
						|
        return 0;
 | 
						|
    }
 | 
						|
 | 
						|
    # Disable multiprocessing if Parallel::ForkManager is not available
 | 
						|
    if ( ! $HAVE_Parallel_ForkManager ) {
 | 
						|
        return 0;
 | 
						|
    }
 | 
						|
 | 
						|
    # Set to number of cores on Linux
 | 
						|
    if ( $^O =~ /linux/i and -x '/usr/bin/nproc' ) {
 | 
						|
        my $numavcores_linux = `/usr/bin/nproc`;
 | 
						|
        chomp $numavcores_linux;
 | 
						|
        if ( $numavcores_linux =~ /^\d+$/ ) {
 | 
						|
            return $numavcores_linux;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    # Set to number of cores on macOS
 | 
						|
    if ( $^O =~ /darwin/i and -x '/usr/sbin/sysctl') {
 | 
						|
       my $numavcores_macos = `/usr/sbin/sysctl -n hw.physicalcpu`;
 | 
						|
       chomp $numavcores_macos;
 | 
						|
       if ($numavcores_macos =~ /^\d+$/ ) {
 | 
						|
           return $numavcores_macos;
 | 
						|
       }
 | 
						|
    }
 | 
						|
 | 
						|
    # Disable multiprocessing in other cases
 | 
						|
    return 0;
 | 
						|
} # 1}}}
 | 
						|
sub exclude_autogenerated_files {            # {{{1
 | 
						|
    my ($ra_file_list, # in/out
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
        $rh_Ignored  , # out
 | 
						|
       ) = @_;
 | 
						|
    print "-> exclude_autogenerated_files()\n" if $opt_v > 2;
 | 
						|
    my @file_list_minus_autogen = ();
 | 
						|
    foreach my $file (@{$ra_file_list}) {
 | 
						|
        if ($file !~ /\.go$/ && $file !~ /\.ʕ◔ϖ◔ʔ$/) {
 | 
						|
            # at the moment, only know Go autogenerated files
 | 
						|
            push @file_list_minus_autogen, $file;
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        my $first_line = first_line($file, 1, $rh_Err, $raa_errors);
 | 
						|
        if ($first_line =~ m{^//\s+Code\s+generated\s+.*?\s+DO\s+NOT\s+EDIT\.$}) {
 | 
						|
            $rh_Ignored->{$file} = 'Go autogenerated file';
 | 
						|
        } else {
 | 
						|
            # Go, but not autogenerated
 | 
						|
            push @file_list_minus_autogen, $file;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    @{$ra_file_list} = @file_list_minus_autogen;
 | 
						|
 | 
						|
    if ($opt_force_git) {
 | 
						|
        my $repo_dir = git_root_dir();
 | 
						|
        my @file_list_minus_linguist = ();
 | 
						|
        # if there's a .gitattributes file, look for linguist-generated
 | 
						|
        # and linguist-vendored entries to ignore
 | 
						|
        my $GA = ".gitattributes";
 | 
						|
        if (-f $GA) {
 | 
						|
            foreach my $line (read_file($GA)) {
 | 
						|
                next unless $line =~ /^(.*?)\s+(linguist-(vendored|generated))/;
 | 
						|
                my $re = glob2regex($1);
 | 
						|
                foreach my $file (@{$ra_file_list}) {
 | 
						|
                    my $full_path = File::Spec->catfile($repo_dir, $file);
 | 
						|
                    my $rel_file  = File::Spec->abs2rel($full_path, $cwd);
 | 
						|
                    my $match = undef;
 | 
						|
                    if ($ON_WINDOWS) {
 | 
						|
                        $rel_file =~ s{\\}{/}g;
 | 
						|
                        $match = $rel_file =~ m{$re}i;
 | 
						|
                    } else {
 | 
						|
                        $match = $rel_file =~ m{$re};
 | 
						|
                    }
 | 
						|
                    if ($match) {
 | 
						|
#print "RULE [$rel_file] v [$re]\n";
 | 
						|
                        $rh_Ignored->{$file} = "matches $GA rule '$line'";
 | 
						|
                    } else {
 | 
						|
                        push @file_list_minus_linguist, $file;
 | 
						|
                    }
 | 
						|
                }
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
    print "<- exclude_autogenerated_files()\n" if $opt_v > 2;
 | 
						|
} # 1}}}
 | 
						|
sub git_root_dir {                           # {{{1
 | 
						|
    # if in a git repo, return the repo's top level directory
 | 
						|
    my $cmd = "git rev-parse --show-toplevel";
 | 
						|
    print $cmd, "\n" if $opt_v > 1;
 | 
						|
    my $dir = undef;
 | 
						|
    chomp($dir = `$cmd`);
 | 
						|
    die "Not in a git repository" unless $dir
 | 
						|
} # 1}}}
 | 
						|
sub file_extension {                         # {{{1
 | 
						|
    my ($fname, ) = @_;
 | 
						|
    $fname =~ m/\.(\w+)$/;
 | 
						|
    if ($1) {
 | 
						|
        return $1;
 | 
						|
    } else {
 | 
						|
        return "";
 | 
						|
    }
 | 
						|
} # 1}}}
 | 
						|
sub count_files {                            # {{{1
 | 
						|
    my ($filelist, $counter_type, $language_hash) = @_;
 | 
						|
    print "-> count_files()\n" if $opt_v > 2;
 | 
						|
    my @p_errors = ();
 | 
						|
    my %p_ignored = ();
 | 
						|
    my %p_rbl = ();
 | 
						|
    my %p_rbf = ();
 | 
						|
    my %Language = %{$language_hash};
 | 
						|
 | 
						|
    foreach my $file (@$filelist) {
 | 
						|
        if ( ! $counter_type ) {
 | 
						|
            # Multithreading disabled
 | 
						|
            $nCounted++;
 | 
						|
 | 
						|
            printf "Counting:  %d\r", $nCounted
 | 
						|
                 unless (!$opt_progress_rate or ($nCounted % $opt_progress_rate));
 | 
						|
        }
 | 
						|
 | 
						|
        next if $Ignored{$file};
 | 
						|
        if ($opt_include_ext and not $Include_Ext{ file_extension($file) }) {
 | 
						|
            $p_ignored{$file} = "not in --include-ext=$opt_include_ext";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        if ($opt_include_lang and not $Include_Language{lc($Language{$file})}) {
 | 
						|
            $p_ignored{$file} = "not in --include-lang=$opt_include_lang";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        if ($Exclude_Language{$Language{$file}}) {
 | 
						|
            $p_ignored{$file} = "--exclude-lang=$Language{$file}";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        if ($opt_force_lang_def and ($Language{$file} eq "XML") and
 | 
						|
            !defined $Filters_by_Language{XML}) {
 | 
						|
            # XML check is attempted for all unidentified text files.
 | 
						|
            # This can't be done if user forces language definition
 | 
						|
            # that excludes XML.  GH #596
 | 
						|
            next;
 | 
						|
        }
 | 
						|
 | 
						|
        my $Filters_by_Language_Language_file = ! @{$Filters_by_Language{$Language{$file}} };
 | 
						|
        if ($Filters_by_Language_Language_file) {
 | 
						|
            if ($Language{$file} eq "(unknown)") {
 | 
						|
                $p_ignored{$file} = "language unknown (#1)";
 | 
						|
            } else {
 | 
						|
                $p_ignored{$file} = "missing Filters_by_Language{$Language{$file}}";
 | 
						|
            }
 | 
						|
            next;
 | 
						|
        }
 | 
						|
 | 
						|
        my ($all_line_count, $blank_count, $comment_count, $code_count) = (0, 0, 0, 0);
 | 
						|
        if (!$opt_only_count_files) {
 | 
						|
            if ($opt_use_sloccount and $Language{$file} =~ /^(C|C\+\+|XML|PHP|Pascal|Java)$/) {
 | 
						|
                chomp ($blank_count     = `grep -cv \"[^[:space:]]\" '$file'`);
 | 
						|
                chomp ($all_line_count  = `cat '$file' | wc -l`);
 | 
						|
                if      ($Language{$file} =~ /^(C|C\+\+)$/) {
 | 
						|
                    $code_count = `cat '$file' | c_count      | head -n 1`;
 | 
						|
                } elsif ($Language{$file} eq "XML") {
 | 
						|
                    $code_count = `cat '$file' | xml_count    | head -n 1`;
 | 
						|
                } elsif ($Language{$file} eq "PHP") {
 | 
						|
                    $code_count = `cat '$file' | php_count    | head -n 1`;
 | 
						|
                } elsif ($Language{$file} eq "Pascal") {
 | 
						|
                    $code_count = `cat '$file' | pascal_count | head -n 1`;
 | 
						|
                } elsif ($Language{$file} eq "Java") {
 | 
						|
                    $code_count = `cat '$file' | java_count   | head -n 1`;
 | 
						|
                } else {
 | 
						|
                    die "SLOCCount match failure: file=[$file] lang=[$Language{$file}]";
 | 
						|
                }
 | 
						|
                $code_count = substr($code_count, 0, -2);
 | 
						|
                $comment_count = $all_line_count - $code_count - $blank_count;
 | 
						|
            } else {
 | 
						|
                ($all_line_count,
 | 
						|
                $blank_count   ,
 | 
						|
                $comment_count ,) = call_counter($file, $Language{$file}, \@Errors);
 | 
						|
                $code_count = $all_line_count - $blank_count - $comment_count;
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        if ($opt_by_file) {
 | 
						|
            $p_rbf{$file}{'code'   } = $code_count     ;
 | 
						|
            $p_rbf{$file}{'blank'  } = $blank_count    ;
 | 
						|
            $p_rbf{$file}{'comment'} = $comment_count  ;
 | 
						|
            $p_rbf{$file}{'lang'   } = $Language{$file};
 | 
						|
            $p_rbf{$file}{'nFiles' } = 1;
 | 
						|
        } else {
 | 
						|
            $p_rbf{$file} = 1;  # just keep track of counted files
 | 
						|
        }
 | 
						|
 | 
						|
        $p_rbl{$Language{$file}}{'nFiles'}++;
 | 
						|
        $p_rbl{$Language{$file}}{'code'}    += $code_count   ;
 | 
						|
        $p_rbl{$Language{$file}}{'blank'}   += $blank_count  ;
 | 
						|
        $p_rbl{$Language{$file}}{'comment'} += $comment_count;
 | 
						|
 | 
						|
    }
 | 
						|
    print "<- count_files()\n" if $opt_v > 2;
 | 
						|
    return {
 | 
						|
        "ignored" => \%p_ignored,
 | 
						|
        "errors"  => \@p_errors,
 | 
						|
        "results_by_file" => \%p_rbf,
 | 
						|
        "results_by_language" => \%p_rbl,
 | 
						|
    }
 | 
						|
} # 1}}}
 | 
						|
sub count_filesets {                         # {{{1
 | 
						|
    my ($fset_a,
 | 
						|
        $fset_b,
 | 
						|
        $files_added,
 | 
						|
        $files_removed,
 | 
						|
        $file_pairs,
 | 
						|
        $counter_type,
 | 
						|
        $language_hash,
 | 
						|
        $rh_Ignored) = @_;
 | 
						|
    print "-> count_filesets()\n" if $opt_v > 2;
 | 
						|
    my @p_errors = ();
 | 
						|
    my %p_alignment = ();
 | 
						|
    my %p_ignored = ();
 | 
						|
    my %p_rbl = ();
 | 
						|
    my %p_rbf = ();
 | 
						|
    my %p_dbl = ();
 | 
						|
    my %p_dbf = ();
 | 
						|
    my %Language = %$language_hash;
 | 
						|
 | 
						|
    my $nCounted = 0;
 | 
						|
 | 
						|
    my %already_counted = (); # already_counted{ filename } = 1
 | 
						|
 | 
						|
    if (!@$file_pairs) {
 | 
						|
        # Special case where all files were either added or deleted.
 | 
						|
        # In this case, one of these arrays will be empty:
 | 
						|
        #   @files_added, @files_removed
 | 
						|
        # so loop over both to cover both cases.
 | 
						|
        my $status = @$files_added ? 'added' : 'removed';
 | 
						|
        my $fset = @$files_added ? $fset_b : $fset_a;
 | 
						|
        foreach my $file (@$files_added, @$files_removed) {
 | 
						|
            next unless defined $Language{$fset}{$file};
 | 
						|
            my $Lang = $Language{$fset}{$file};
 | 
						|
            next if $Lang eq '(unknown)';
 | 
						|
            my ($all_line_count,
 | 
						|
                $blank_count   ,
 | 
						|
                $comment_count ,
 | 
						|
                ) = call_counter($file, $Lang, \@p_errors);
 | 
						|
            $already_counted{$file} = 1;
 | 
						|
            my $code_count = $all_line_count-$blank_count-$comment_count;
 | 
						|
            if ($opt_by_file) {
 | 
						|
                $p_dbf{$file}{'code'   }{$status} += $code_count   ;
 | 
						|
                $p_dbf{$file}{'blank'  }{$status} += $blank_count  ;
 | 
						|
                $p_dbf{$file}{'comment'}{$status} += $comment_count;
 | 
						|
                $p_dbf{$file}{'lang'   }{$status}  = $Lang         ;
 | 
						|
                $p_dbf{$file}{'nFiles' }{$status} += 1             ;
 | 
						|
            }
 | 
						|
            $p_dbl{$Lang}{'code'   }{$status} += $code_count   ;
 | 
						|
            $p_dbl{$Lang}{'blank'  }{$status} += $blank_count  ;
 | 
						|
            $p_dbl{$Lang}{'comment'}{$status} += $comment_count;
 | 
						|
            $p_dbl{$Lang}{'nFiles' }{$status} += 1             ;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    #use Data::Dumper::Simple;
 | 
						|
    #use Data::Dumper;
 | 
						|
    #print Dumper(\@files_added, \@files_removed, \@file_pairs);
 | 
						|
    #print "after align_by_pairs:\n";
 | 
						|
    #print "added:\n";
 | 
						|
 | 
						|
    foreach my $f (@$files_added) {
 | 
						|
        next if $already_counted{$f};
 | 
						|
        #printf "%10s -> %s\n", $f, $Language{$fh[$F+1]}{$f};
 | 
						|
        # Don't proceed unless the file (both L and R versions)
 | 
						|
        # is in a known language.
 | 
						|
        next if $opt_include_ext
 | 
						|
            and not $Include_Ext{ file_extension($f) };
 | 
						|
        if (!defined $Language{$fset_b}{$f}) {
 | 
						|
            $p_ignored{$f} = "excluded or unknown language";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        next if $opt_include_lang
 | 
						|
            and not $Include_Language{lc($Language{$fset_b}{$f})};
 | 
						|
        my $this_lang = $Language{$fset_b}{$f};
 | 
						|
        if (!defined  $Language{$fset_b}{$f}) {
 | 
						|
            # shouldn't happen but could get here if using
 | 
						|
            # --diff-list-file which bypasses earlier checks
 | 
						|
            $p_ignored{$f} = "empty or uncharacterizeable file";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        if ($this_lang eq "(unknown)") {
 | 
						|
            $p_ignored{$f} = "unknown language";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        if ($Exclude_Language{$this_lang}) {
 | 
						|
            $p_ignored{$f} = "--exclude-lang=$this_lang";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        $p_alignment{"added"}{sprintf "  + %s ; %s\n", $f, $this_lang} = 1;
 | 
						|
        ++$p_dbl{ $this_lang }{'nFiles'}{'added'};
 | 
						|
        # Additionally, add contents of file $f to
 | 
						|
        # Delta_by_File{$f}{comment/blank/code}{'added'}
 | 
						|
        # Delta_by_Language{$lang}{comment/blank/code}{'added'}
 | 
						|
        # via the $p_dbl and $p_dbf variables.
 | 
						|
        my ($all_line_count,
 | 
						|
            $blank_count   ,
 | 
						|
            $comment_count ,
 | 
						|
           ) = call_counter($f, $this_lang, \@p_errors);
 | 
						|
        $p_dbl{ $this_lang }{'comment'}{'added'} += $comment_count;
 | 
						|
        $p_dbl{ $this_lang }{'blank'}{'added'}   += $blank_count;
 | 
						|
        $p_dbl{ $this_lang }{'code'}{'added'}    +=
 | 
						|
           $all_line_count - $blank_count - $comment_count;
 | 
						|
        $p_dbf{ $f }{'comment'}{'added'} = $comment_count;
 | 
						|
        $p_dbf{ $f }{'blank'}{'added'}   = $blank_count;
 | 
						|
        $p_dbf{ $f }{'code'}{'added'}    =
 | 
						|
           $all_line_count - $blank_count - $comment_count;
 | 
						|
    }
 | 
						|
 | 
						|
    #print "removed:\n";
 | 
						|
    foreach my $f (@$files_removed) {
 | 
						|
        next if $already_counted{$f};
 | 
						|
        # Don't proceed unless the file (both L and R versions)
 | 
						|
        # is in a known language.
 | 
						|
        next if $opt_include_ext
 | 
						|
            and not $Include_Ext{ file_extension($f) };
 | 
						|
        next if $opt_include_lang
 | 
						|
            and (not defined $Language{$fset_a}{$f}
 | 
						|
             or  not defined $Include_Language{lc($Language{$fset_a}{$f})});
 | 
						|
        my $this_lang = $Language{$fset_a}{$f};
 | 
						|
        if ((not defined $this_lang) or ($this_lang eq "(unknown)")) {
 | 
						|
            $p_ignored{$f} = "unknown language";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        if ($Exclude_Language{$this_lang}) {
 | 
						|
            $p_ignored{$f} = "--exclude-lang=$this_lang";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        ++$p_dbl{ $this_lang }{'nFiles'}{'removed'};
 | 
						|
        $p_alignment{"removed"}{sprintf "  - %s ; %s\n", $f, $this_lang} = 1;
 | 
						|
        #printf "%10s -> %s\n", $f, $Language{$fh[$F  ]}{$f};
 | 
						|
        # Additionally, add contents of file $f to
 | 
						|
        #        Delta_by_File{$f}{comment/blank/code}{'removed'}
 | 
						|
        #        Delta_by_Language{$lang}{comment/blank/code}{'removed'}
 | 
						|
        # via the $p_dbl and $p_dbf variables.
 | 
						|
        my ($all_line_count,
 | 
						|
            $blank_count   ,
 | 
						|
            $comment_count ,
 | 
						|
           ) = call_counter($f, $this_lang, \@p_errors);
 | 
						|
        $p_dbl{ $this_lang}{'comment'}{'removed'} += $comment_count;
 | 
						|
        $p_dbl{ $this_lang}{'blank'}{'removed'}   += $blank_count;
 | 
						|
        $p_dbl{ $this_lang}{'code'}{'removed'}    +=
 | 
						|
             $all_line_count - $blank_count - $comment_count;
 | 
						|
        $p_dbf{ $f }{'comment'}{'removed'} = $comment_count;
 | 
						|
        $p_dbf{ $f }{'blank'}{'removed'}   = $blank_count;
 | 
						|
        $p_dbf{ $f }{'code'}{'removed'}    =
 | 
						|
            $all_line_count - $blank_count - $comment_count;
 | 
						|
    }
 | 
						|
 | 
						|
    my $n_file_pairs_compared = 0;
 | 
						|
    # Don't know ahead of time how many file pairs will be compared
 | 
						|
    # since duplicates are weeded out below.  The answer is
 | 
						|
    # scalar @file_pairs only if there are no duplicates.
 | 
						|
 | 
						|
    foreach my $pair (@$file_pairs) {
 | 
						|
        my $file_L = $pair->[0];
 | 
						|
        my $file_R = $pair->[1];
 | 
						|
        my $Lang_L = $Language{$fset_a}{$file_L};
 | 
						|
        my $Lang_R = $Language{$fset_b}{$file_R};
 | 
						|
        if (!defined($Lang_L) or !defined($Lang_R)) {
 | 
						|
            print " -> count_filesets skipping $file_L, $file_R ",
 | 
						|
                  "because language cannot be inferred\n" if $opt_v;
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        #print "main step 6 file_L=$file_L    file_R=$file_R\n";
 | 
						|
        ++$nCounted;
 | 
						|
        printf "Counting:  %d\r", $nCounted
 | 
						|
             unless ($counter_type or !$opt_progress_rate or ($nCounted % $opt_progress_rate));
 | 
						|
        next if $p_ignored{$file_L} or $p_ignored{$file_R};
 | 
						|
 | 
						|
        # filter out non-included extensions
 | 
						|
        if ($opt_include_ext  and not $Include_Ext{ file_extension($file_L) }
 | 
						|
                              and not $Include_Ext{ file_extension($file_R) }) {
 | 
						|
            $p_ignored{$file_L} = "not in --include-ext=$opt_include_ext";
 | 
						|
            $p_ignored{$file_R} = "not in --include-ext=$opt_include_ext";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        # filter out non-included languages
 | 
						|
        if ($opt_include_lang and not $Include_Language{lc($Lang_L)}
 | 
						|
                              and not $Include_Language{lc($Lang_R)}) {
 | 
						|
            $p_ignored{$file_L} = "not in --include-lang=$opt_include_lang";
 | 
						|
            $p_ignored{$file_R} = "not in --include-lang=$opt_include_lang";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        # filter out excluded or unrecognized languages
 | 
						|
        if ($Exclude_Language{$Lang_L} or $Exclude_Language{$Lang_R}) {
 | 
						|
            $p_ignored{$file_L} = "--exclude-lang=$Lang_L";
 | 
						|
            $p_ignored{$file_R} = "--exclude-lang=$Lang_R";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
 | 
						|
        my $not_Filters_by_Language_Lang_LR = 0;
 | 
						|
        #print "file_LR = [$file_L] [$file_R]\n";
 | 
						|
        #print "Lang_LR = [$Lang_L] [$Lang_R]\n";
 | 
						|
        if (($Lang_L eq "(unknown)") or
 | 
						|
            ($Lang_R eq "(unknown)") or
 | 
						|
            !(@{$Filters_by_Language{$Lang_L} }) or
 | 
						|
            !(@{$Filters_by_Language{$Lang_R} })) {
 | 
						|
            $not_Filters_by_Language_Lang_LR = 1;
 | 
						|
        }
 | 
						|
        if ($not_Filters_by_Language_Lang_LR) {
 | 
						|
            if (($Lang_L eq "(unknown)") or ($Lang_R eq "(unknown)")) {
 | 
						|
                $p_ignored{$fset_a}{$file_L} = "language unknown (#1)";
 | 
						|
                $p_ignored{$fset_b}{$file_R} = "language unknown (#1)";
 | 
						|
            } else {
 | 
						|
                $p_ignored{$fset_a}{$file_L} = "missing Filters_by_Language{$Lang_L}";
 | 
						|
                $p_ignored{$fset_b}{$file_R} = "missing Filters_by_Language{$Lang_R}";
 | 
						|
            }
 | 
						|
            next;
 | 
						|
        }
 | 
						|
 | 
						|
        # filter out explicitly excluded files
 | 
						|
        if ($opt_exclude_list_file and
 | 
						|
            ($rh_Ignored->{$file_L} or $rh_Ignored->{$file_R})) {
 | 
						|
            my $msg_2;
 | 
						|
            if ($rh_Ignored->{$file_L}) {
 | 
						|
                $msg_2 = "$file_L (paired to $file_R)";
 | 
						|
            } else {
 | 
						|
                $msg_2 = "$file_R (paired to $file_L)";
 | 
						|
            }
 | 
						|
            my $msg_1 = "in --exclude-list-file=$opt_exclude_list_file";
 | 
						|
            $p_ignored{$file_L} = "$msg_1, $msg_2";
 | 
						|
            $p_ignored{$file_R} = "$msg_1, $msg_2";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
 | 
						|
        #print "DIFF($file_L, $file_R)\n";
 | 
						|
        # step 0: compare the two files' contents
 | 
						|
        chomp ( my @lines_L = read_file($file_L) );
 | 
						|
        chomp ( my @lines_R = read_file($file_R) );
 | 
						|
        my $language_file_L = "";
 | 
						|
        if (defined $Language{$fset_a}{$file_L}) {
 | 
						|
            $language_file_L = $Language{$fset_a}{$file_L};
 | 
						|
        } else {
 | 
						|
            # files $file_L and $file_R do not contain known language
 | 
						|
            next;
 | 
						|
        }
 | 
						|
 | 
						|
        my $contents_are_same = 1;
 | 
						|
        if (scalar @lines_L == scalar @lines_R) {
 | 
						|
            # same size, must compare line-by-line
 | 
						|
            for (my $i = 0; $i < scalar @lines_L; $i++) {
 | 
						|
               if ($lines_L[$i] ne $lines_R[$i]) {
 | 
						|
                   $contents_are_same = 0;
 | 
						|
                   last;
 | 
						|
               }
 | 
						|
            }
 | 
						|
            if ($contents_are_same) {
 | 
						|
                ++$p_dbl{$language_file_L}{'nFiles'}{'same'};
 | 
						|
            } else {
 | 
						|
                ++$p_dbl{$language_file_L}{'nFiles'}{'modified'};
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            $contents_are_same = 0;
 | 
						|
            # different sizes, contents have changed
 | 
						|
            ++$p_dbl{$language_file_L}{'nFiles'}{'modified'};
 | 
						|
        }
 | 
						|
 | 
						|
        if ($opt_diff_alignment) {
 | 
						|
            my $str =  "$file_L | $file_R ; $language_file_L";
 | 
						|
            if ($contents_are_same) {
 | 
						|
                $p_alignment{"pairs"}{"  == $str"} = 1;
 | 
						|
            } else {
 | 
						|
                $p_alignment{"pairs"}{"  != $str"} = 1;
 | 
						|
            }
 | 
						|
            ++$n_file_pairs_compared;
 | 
						|
        }
 | 
						|
 | 
						|
        my ($all_line_count_L, $blank_count_L   , $comment_count_L ,
 | 
						|
            $all_line_count_R, $blank_count_R   , $comment_count_R , )  = (0,0,0,0,0,0,);
 | 
						|
        if (!$contents_are_same) {
 | 
						|
            # step 1: identify comments in both files
 | 
						|
            #print "Diff blank removal L language= $Lang_L";
 | 
						|
            #print " scalar(lines_L)=", scalar @lines_L, "\n";
 | 
						|
            my @original_minus_blanks_L
 | 
						|
                    = rm_blanks(  \@lines_L, $Lang_L, \%EOL_Continuation_re);
 | 
						|
            #print "1: scalar(original_minus_blanks_L)=", scalar @original_minus_blanks_L, "\n";
 | 
						|
            @lines_L    = @original_minus_blanks_L;
 | 
						|
            #print "2: scalar(lines_L)=", scalar @lines_L, "\n";
 | 
						|
            @lines_L    = add_newlines(\@lines_L); # compensate for rm_comments()
 | 
						|
            @lines_L    = rm_comments( \@lines_L, $Lang_L, $file_L,
 | 
						|
                                       \%EOL_Continuation_re);
 | 
						|
            #print "3: scalar(lines_L)=", scalar @lines_L, "\n";
 | 
						|
 | 
						|
            #print "Diff blank removal R language= $Lang_R\n";
 | 
						|
            my @original_minus_blanks_R
 | 
						|
                    = rm_blanks(  \@lines_R, $Lang_R, \%EOL_Continuation_re);
 | 
						|
            @lines_R    = @original_minus_blanks_R;
 | 
						|
            @lines_R    = add_newlines(\@lines_R); # taken away by rm_comments()
 | 
						|
            @lines_R    = rm_comments( \@lines_R, $Lang_R, $file_R,
 | 
						|
                                       \%EOL_Continuation_re);
 | 
						|
 | 
						|
            my (@diff_LL, @diff_LR, );
 | 
						|
                   array_diff( $file_L                  ,   # in
 | 
						|
                       \@original_minus_blanks_L ,   # in
 | 
						|
                       \@lines_L                 ,   # in
 | 
						|
                       "comment"                 ,   # in
 | 
						|
                       \@diff_LL, \@diff_LR      ,   # out
 | 
						|
                       \@p_errors);                    # in/out
 | 
						|
 | 
						|
            my (@diff_RL, @diff_RR, );
 | 
						|
                    array_diff( $file_R                  ,   # in
 | 
						|
                       \@original_minus_blanks_R ,   # in
 | 
						|
                       \@lines_R                 ,   # in
 | 
						|
                       "comment"                 ,   # in
 | 
						|
                       \@diff_RL, \@diff_RR      ,   # out
 | 
						|
                       \@p_errors);                    # in/out
 | 
						|
            # each line of each file is now classified as
 | 
						|
            # code or comment
 | 
						|
            #use Data::Dumper;
 | 
						|
            #print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
 | 
						|
            #print Dumper("diff_RL", \@diff_RL, "diff_RR", \@diff_RR, );
 | 
						|
            #die;
 | 
						|
 | 
						|
            # step 2: separate code from comments for L and R files
 | 
						|
            my @code_L = ();
 | 
						|
            my @code_R = ();
 | 
						|
            my @comm_L = ();
 | 
						|
            my @comm_R = ();
 | 
						|
            foreach my $line_info (@diff_LL) {
 | 
						|
                if      ($line_info->{'type'} eq "code"   ) {
 | 
						|
                    push @code_L, $line_info->{char};
 | 
						|
                } elsif ($line_info->{'type'} eq "comment") {
 | 
						|
                    push @comm_L, $line_info->{char};
 | 
						|
                } else {
 | 
						|
                    die "Diff unexpected line type ",
 | 
						|
                        $line_info->{'type'}, "for $file_L line ",
 | 
						|
                        $line_info->{'lnum'};
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            foreach my $line_info (@diff_RL) {
 | 
						|
                if      ($line_info->{type} eq "code"   ) {
 | 
						|
                    push @code_R, $line_info->{'char'};
 | 
						|
                } elsif ($line_info->{type} eq "comment") {
 | 
						|
                    push @comm_R, $line_info->{'char'};
 | 
						|
                } else {
 | 
						|
                    die "Diff unexpected line type ",
 | 
						|
                        $line_info->{'type'}, "for $file_R line ",
 | 
						|
                        $line_info->{'lnum'};
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            if ($opt_ignore_whitespace) {
 | 
						|
                # strip all whitespace from each line of source code
 | 
						|
                # and comments then use these stripped arrays in diffs
 | 
						|
                foreach (@code_L) { s/\s+//g }
 | 
						|
                foreach (@code_R) { s/\s+//g }
 | 
						|
                foreach (@comm_L) { s/\s+//g }
 | 
						|
                foreach (@comm_R) { s/\s+//g }
 | 
						|
            }
 | 
						|
            if ($opt_ignore_case) {
 | 
						|
                # change all text to lowercase in diffs
 | 
						|
                foreach (@code_L) { $_ = lc }
 | 
						|
                foreach (@code_R) { $_ = lc }
 | 
						|
                foreach (@comm_L) { $_ = lc }
 | 
						|
                foreach (@comm_R) { $_ = lc }
 | 
						|
            }
 | 
						|
            # step 3: compute code diffs
 | 
						|
            array_diff("$file_L v. $file_R"   ,   # in
 | 
						|
                       \@code_L               ,   # in
 | 
						|
                       \@code_R               ,   # in
 | 
						|
                       "revision"             ,   # in
 | 
						|
                       \@diff_LL, \@diff_LR   ,   # out
 | 
						|
                       \@p_errors);                 # in/out
 | 
						|
            #print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
 | 
						|
            #print Dumper("diff_LR", \@diff_LR);
 | 
						|
            foreach my $line_info (@diff_LR) {
 | 
						|
                my $status = $line_info->{'desc'}; # same|added|removed|modified
 | 
						|
                ++$p_dbl{$Lang_L}{'code'}{$status};
 | 
						|
                if ($opt_by_file) {
 | 
						|
                    ++$p_dbf{$file_L}{'code'}{$status};
 | 
						|
                }
 | 
						|
            }
 | 
						|
            #use Data::Dumper;
 | 
						|
            #print Dumper("code diffs:", \@diff_LL, \@diff_LR);
 | 
						|
 | 
						|
            # step 4: compute comment diffs
 | 
						|
            array_diff("$file_L v. $file_R"   ,   # in
 | 
						|
                       \@comm_L               ,   # in
 | 
						|
                       \@comm_R               ,   # in
 | 
						|
                       "revision"             ,   # in
 | 
						|
                       \@diff_LL, \@diff_LR   ,   # out
 | 
						|
                       \@Errors);                 # in/out
 | 
						|
            #print Dumper("comment diff_LR", \@diff_LR);
 | 
						|
            foreach my $line_info (@diff_LR) {
 | 
						|
                my $status = $line_info->{'desc'}; # same|added|removed|modified
 | 
						|
                ++$p_dbl{$Lang_L}{'comment'}{$status};
 | 
						|
                if ($opt_by_file) {
 | 
						|
                    ++$p_dbf{$file_L}{'comment'}{$status};
 | 
						|
                }
 | 
						|
            }
 | 
						|
            #print Dumper("comment diffs:", \@diff_LL, \@diff_LR);
 | 
						|
 | 
						|
            # step 5: compute difference in blank lines (kind of pointless)
 | 
						|
            next if $Lang_L eq '(unknown)' or
 | 
						|
                    $Lang_R eq '(unknown)';
 | 
						|
            ($all_line_count_L,
 | 
						|
             $blank_count_L   ,
 | 
						|
             $comment_count_L ,
 | 
						|
            ) = call_counter($file_L, $Lang_L, \@Errors);
 | 
						|
 | 
						|
            ($all_line_count_R,
 | 
						|
             $blank_count_R   ,
 | 
						|
             $comment_count_R ,
 | 
						|
            ) = call_counter($file_R, $Lang_R, \@Errors);
 | 
						|
        } else {
 | 
						|
            # L and R file contents are identical, no need to diff
 | 
						|
            ($all_line_count_L,
 | 
						|
             $blank_count_L   ,
 | 
						|
             $comment_count_L ,
 | 
						|
            ) = call_counter($file_L, $Lang_L, \@Errors);
 | 
						|
            $all_line_count_R = $all_line_count_L;
 | 
						|
            $blank_count_R    = $blank_count_L   ;
 | 
						|
            $comment_count_R  = $comment_count_L ;
 | 
						|
            my $code_lines_R  = $all_line_count_R - ($blank_count_R + $comment_count_R);
 | 
						|
            $p_dbl{$Lang_L}{'blank'}{'same'}   += $blank_count_R;
 | 
						|
            $p_dbl{$Lang_L}{'comment'}{'same'} += $comment_count_R;
 | 
						|
            $p_dbl{$Lang_L}{'code'}{'same'}    += $code_lines_R;
 | 
						|
            if ($opt_by_file) {
 | 
						|
                $p_dbf{$file_L}{'blank'}{'same'}   += $blank_count_R;
 | 
						|
                $p_dbf{$file_L}{'comment'}{'same'} += $comment_count_R;
 | 
						|
                $p_dbf{$file_L}{'code'}{'same'}    += $code_lines_R;
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        if ($blank_count_L <  $blank_count_R) {
 | 
						|
            my $D = $blank_count_R - $blank_count_L;
 | 
						|
            $p_dbl{$Lang_L}{'blank'}{'added'}   += $D;
 | 
						|
        } else {
 | 
						|
            my $D = $blank_count_L - $blank_count_R;
 | 
						|
            $p_dbl{$Lang_L}{'blank'}{'removed'} += $D;
 | 
						|
        }
 | 
						|
        if ($opt_by_file) {
 | 
						|
            if ($blank_count_L <  $blank_count_R) {
 | 
						|
                my $D = $blank_count_R - $blank_count_L;
 | 
						|
                $p_dbf{$file_L}{'blank'}{'added'}   += $D;
 | 
						|
            } else {
 | 
						|
                my $D = $blank_count_L - $blank_count_R;
 | 
						|
                $p_dbf{$file_L}{'blank'}{'removed'} += $D;
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        my $code_count_L = $all_line_count_L-$blank_count_L-$comment_count_L;
 | 
						|
        if ($opt_by_file) {
 | 
						|
            $p_rbf{$file_L}{'code'   } = $code_count_L    ;
 | 
						|
            $p_rbf{$file_L}{'blank'  } = $blank_count_L   ;
 | 
						|
            $p_rbf{$file_L}{'comment'} = $comment_count_L ;
 | 
						|
            $p_rbf{$file_L}{'lang'   } = $Lang_L          ;
 | 
						|
            $p_rbf{$file_L}{'nFiles' } = 1                ;
 | 
						|
        } else {
 | 
						|
            $p_rbf{$file_L} = 1;  # just keep track of counted files
 | 
						|
        }
 | 
						|
 | 
						|
        $p_rbl{$Lang_L}{'nFiles'}++;
 | 
						|
        $p_rbl{$Lang_L}{'code'}    += $code_count_L   ;
 | 
						|
        $p_rbl{$Lang_L}{'blank'}   += $blank_count_L  ;
 | 
						|
        $p_rbl{$Lang_L}{'comment'} += $comment_count_L;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- count_filesets()\n" if $opt_v > 2;
 | 
						|
    return {
 | 
						|
        "ignored" => \%p_ignored,
 | 
						|
        "errors"  => \@p_errors,
 | 
						|
        "results_by_file" => \%p_rbf,
 | 
						|
        "results_by_language" => \%p_rbl,
 | 
						|
        "delta_by_file" => \%p_dbf,
 | 
						|
        "delta_by_language" => \%p_dbl,
 | 
						|
        "alignment" => \%p_alignment,
 | 
						|
        "n_filepairs_compared" => $n_file_pairs_compared
 | 
						|
    }
 | 
						|
} # 1}}}
 | 
						|
sub write_alignment_data {                   # {{{1
 | 
						|
    my ($filename, $n_filepairs_compared, $data ) = @_;
 | 
						|
    my @output = ();
 | 
						|
    if ( $data->{'added'} ) {
 | 
						|
        my %added_lines = %{$data->{'added'}};
 | 
						|
        push (@output, "Files added: " . (scalar keys %added_lines) . "\n");
 | 
						|
        foreach my $line ( sort keys %added_lines ) {
 | 
						|
            push (@output, $line);
 | 
						|
        }
 | 
						|
        push (@output, "\n" );
 | 
						|
    }
 | 
						|
    if ( $data->{'removed'} ) {
 | 
						|
        my %removed_lines = %{$data->{'removed'}};
 | 
						|
        push (@output, "Files removed: " . (scalar keys %removed_lines) . "\n");
 | 
						|
        foreach my $line ( sort keys %removed_lines ) {
 | 
						|
            push (@output, $line);
 | 
						|
        }
 | 
						|
        push (@output, "\n");
 | 
						|
    }
 | 
						|
    if ( $data->{'pairs'} ) {
 | 
						|
        my %pairs = %{$data->{'pairs'}};
 | 
						|
        push (@output, "File pairs compared: " . $n_filepairs_compared . "\n");
 | 
						|
        foreach my $pair ( sort keys %pairs ) {
 | 
						|
            push (@output, $pair);
 | 
						|
        }
 | 
						|
    }
 | 
						|
    write_file($filename, {}, @output);
 | 
						|
} # 1}}}
 | 
						|
sub exclude_dir_validates {                  # {{{1
 | 
						|
    my ($rh_Exclude_Dir) = @_;
 | 
						|
    my $is_OK = 1;
 | 
						|
    foreach my $dir (keys %{$rh_Exclude_Dir}) {
 | 
						|
        if (($ON_WINDOWS and $dir =~ m{\\}) or ($dir =~ m{/})) {
 | 
						|
            $is_OK = 0;
 | 
						|
            warn "--exclude-dir '$dir' :  cannot specify directory paths\n";
 | 
						|
        }
 | 
						|
    }
 | 
						|
    if (!$is_OK) {
 | 
						|
        warn "Use '--fullpath --not-match-d=REGEX' instead\n";
 | 
						|
    }
 | 
						|
    return $is_OK;
 | 
						|
} # 1}}}
 | 
						|
sub process_exclude_list_file {              # {{{1
 | 
						|
    my ($list_file      , # in
 | 
						|
        $rh_exclude_dir , # out
 | 
						|
        $rh_ignored     , # out
 | 
						|
       ) = @_;
 | 
						|
    # note: references global @file_list
 | 
						|
    print "-> process_exclude_list_file($list_file)\n" if $opt_v > 2;
 | 
						|
    # reject a specific set of files and/or directories
 | 
						|
    my @reject_list   = ($list_file); # don't count the exclude list file itself
 | 
						|
    push @reject_list, read_list_file($list_file);
 | 
						|
    my @file_reject_list = ();
 | 
						|
    foreach my $F_or_D (@reject_list) {
 | 
						|
        if (is_dir($F_or_D)) {
 | 
						|
            $rh_exclude_dir->{$F_or_D} = 1;
 | 
						|
        } elsif (is_file($F_or_D)) {
 | 
						|
            push @file_reject_list, $F_or_D;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    # Normalize file names for better comparison.
 | 
						|
    my %normalized_input   = normalize_file_names(@file_list);
 | 
						|
    my %normalized_reject  = normalize_file_names(@file_reject_list);
 | 
						|
    my %normalized_exclude = normalize_file_names(keys %{$rh_exclude_dir});
 | 
						|
    foreach my $F (keys %normalized_input) {
 | 
						|
        if ($normalized_reject{$F} or is_excluded($F, \%normalized_exclude)) {
 | 
						|
            my $orig_F = $normalized_input{$F};
 | 
						|
            $rh_ignored->{$orig_F} = "listed in exclusion file $opt_exclude_list_file";
 | 
						|
            print "Ignoring $orig_F because it appears in $opt_exclude_list_file\n"
 | 
						|
                if $opt_v > 1;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- process_exclude_list_file\n" if $opt_v > 2;
 | 
						|
} # 1}}}
 | 
						|
sub combine_results {                        # {{{1
 | 
						|
    # returns 1 if the inputs are categorized by language
 | 
						|
    #         0 if no identifiable language was found
 | 
						|
    my ($ra_report_files, # in
 | 
						|
        $report_type    , # in  "by language" or "by report file"
 | 
						|
        $rhh_count      , # out count{TYPE}{nFiles|code|blank|comment|scaled}
 | 
						|
        $rhaa_Filters_by_Language , # in
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> combine_results(report_type=$report_type)\n" if $opt_v > 2;
 | 
						|
    my $found_language = 0;
 | 
						|
 | 
						|
    foreach my $file (@{$ra_report_files}) {
 | 
						|
        my $n_results_found = 0;
 | 
						|
        my $IN = open_file('<', $file, 1);
 | 
						|
        if (!defined $IN) {
 | 
						|
            warn "Unable to read $file; ignoring.\n";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        while (<$IN>) {
 | 
						|
            next if /^(http|Language|SUM|-----)/;
 | 
						|
            if (!$opt_by_file  and
 | 
						|
                m{^(.*?)\s+         # language
 | 
						|
                   (\d+)\s+         # files
 | 
						|
                   (\d+)\s+         # blank
 | 
						|
                   (\d+)\s+         # comments
 | 
						|
                   (\d+)\s+         # code
 | 
						|
                   (                #    next four entries missing with -no3
 | 
						|
                   x\s+             # x
 | 
						|
                   \d+\.\d+\s+      # scale
 | 
						|
                   =\s+             # =
 | 
						|
                   (\d+\.\d+)\s*    # scaled code
 | 
						|
                   )?
 | 
						|
                   $}x) {
 | 
						|
                if ($report_type eq "by language") {
 | 
						|
                    if (!defined $rhaa_Filters_by_Language->{$1}) {
 | 
						|
                        warn "Unrecognized language '$1' in $file ignored\n";
 | 
						|
                        next;
 | 
						|
                    }
 | 
						|
                    # above test necessary to avoid trying to sum reports
 | 
						|
                    # of reports (which have no language breakdown).
 | 
						|
                    $found_language = 1;
 | 
						|
                    $rhh_count->{$1   }{'nFiles' } += $2;
 | 
						|
                    $rhh_count->{$1   }{'blank'  } += $3;
 | 
						|
                    $rhh_count->{$1   }{'comment'} += $4;
 | 
						|
                    $rhh_count->{$1   }{'code'   } += $5;
 | 
						|
                    $rhh_count->{$1   }{'scaled' } += $7 if $opt_3;
 | 
						|
                } else {
 | 
						|
                    $rhh_count->{$file}{'nFiles' } += $2;
 | 
						|
                    $rhh_count->{$file}{'blank'  } += $3;
 | 
						|
                    $rhh_count->{$file}{'comment'} += $4;
 | 
						|
                    $rhh_count->{$file}{'code'   } += $5;
 | 
						|
                    $rhh_count->{$file}{'scaled' } += $7 if $opt_3;
 | 
						|
                }
 | 
						|
                ++$n_results_found;
 | 
						|
            } elsif ($opt_by_file  and
 | 
						|
                m{^(.*?)\s+         # language
 | 
						|
                   (\d+)\s+         # blank
 | 
						|
                   (\d+)\s+         # comments
 | 
						|
                   (\d+)\s+         # code
 | 
						|
                   (                #    next four entries missing with -no3
 | 
						|
                   x\s+             # x
 | 
						|
                   \d+\.\d+\s+      # scale
 | 
						|
                   =\s+             # =
 | 
						|
                   (\d+\.\d+)\s*    # scaled code
 | 
						|
                   )?
 | 
						|
                   $}x) {
 | 
						|
                if ($report_type eq "by language") {
 | 
						|
                    next unless %{$rhaa_Filters_by_Language->{$1}};
 | 
						|
                    # above test necessary to avoid trying to sum reports
 | 
						|
                    # of reports (which have no language breakdown).
 | 
						|
                    $found_language = 1;
 | 
						|
                    $rhh_count->{$1   }{'nFiles' } +=  1;
 | 
						|
                    $rhh_count->{$1   }{'blank'  } += $2;
 | 
						|
                    $rhh_count->{$1   }{'comment'} += $3;
 | 
						|
                    $rhh_count->{$1   }{'code'   } += $4;
 | 
						|
                    $rhh_count->{$1   }{'scaled' } += $6 if $opt_3;
 | 
						|
                } else {
 | 
						|
                    $rhh_count->{$file}{'nFiles' } +=  1;
 | 
						|
                    $rhh_count->{$file}{'blank'  } += $2;
 | 
						|
                    $rhh_count->{$file}{'comment'} += $3;
 | 
						|
                    $rhh_count->{$file}{'code'   } += $4;
 | 
						|
                    $rhh_count->{$file}{'scaled' } += $6 if $opt_3;
 | 
						|
                }
 | 
						|
                ++$n_results_found;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        warn "No counts found in $file--is the file format correct?\n"
 | 
						|
            unless $n_results_found;
 | 
						|
    }
 | 
						|
    print "<- combine_results\n" if $opt_v > 2;
 | 
						|
    return $found_language;
 | 
						|
} # 1}}}
 | 
						|
sub compute_denominator {                    # {{{1
 | 
						|
    my ($method, $nCode, $nComment, $nBlank, ) = @_;
 | 
						|
    print "-> compute_denominator\n" if $opt_v > 2;
 | 
						|
    my %den        = ( "c" => $nCode );
 | 
						|
       $den{"cm"}  = $den{"c"}  + $nComment;
 | 
						|
       $den{"cmb"} = $den{"cm"} + $nBlank;
 | 
						|
       $den{"cb"}  = $den{"c"}  + $nBlank;
 | 
						|
 | 
						|
    print "<- compute_denominator\n" if $opt_v > 2;
 | 
						|
    return $den{ $method };
 | 
						|
} # 1}}}
 | 
						|
sub yaml_to_json_separators {                # {{{1
 | 
						|
    # YAML and JSON are closely related.  Their differences can be captured
 | 
						|
    # by trailing commas ($C), braces ($open_B, $close_B), and
 | 
						|
    # quotes around text ($Q).
 | 
						|
    print "-> yaml_to_json_separators()\n" if $opt_v > 2;
 | 
						|
    my ($Q, $open_B, $close_B, $start, $C);
 | 
						|
    if ($opt_json) {
 | 
						|
       $C       = ',';
 | 
						|
       $Q       = '"';
 | 
						|
       $open_B  = '{';
 | 
						|
       $close_B = '}';
 | 
						|
       $start   = '{';
 | 
						|
    } else {
 | 
						|
       $C       = '';
 | 
						|
       $Q       = '' ;
 | 
						|
       $open_B  = '' ;
 | 
						|
       $close_B = '';
 | 
						|
       $start   = "---\n# $URL\n";
 | 
						|
    }
 | 
						|
    print "<- yaml_to_json_separators()\n" if $opt_v > 2;
 | 
						|
    return ($Q, $open_B, $close_B, $start, $C);
 | 
						|
} # 1}}}
 | 
						|
sub diff_report     {                        # {{{1
 | 
						|
    # returns an array of lines containing the results
 | 
						|
    print "-> diff_report\n" if $opt_v > 2;
 | 
						|
 | 
						|
    if ($opt_xml) {
 | 
						|
        print "<- diff_report\n" if $opt_v > 2;
 | 
						|
        return diff_xml_report(@_)
 | 
						|
    } elsif ($opt_yaml) {
 | 
						|
        print "<- diff_report\n" if $opt_v > 2;
 | 
						|
        return diff_yaml_report(@_)
 | 
						|
    } elsif ($opt_json) {
 | 
						|
        print "<- diff_report\n" if $opt_v > 2;
 | 
						|
        return diff_json_report(@_)
 | 
						|
    } elsif ($opt_csv or $opt_md) {
 | 
						|
        print "<- diff_report\n" if $opt_v > 2;
 | 
						|
        return diff_csv_report(@_)
 | 
						|
    }
 | 
						|
 | 
						|
    my ($version    , # in
 | 
						|
        $elapsed_sec, # in
 | 
						|
        $report_type, # in  "by language" | "by report file" | "by file"
 | 
						|
        $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 | 
						|
        $rh_scale   , # in
 | 
						|
       ) = @_;
 | 
						|
    my %orig_case = ();
 | 
						|
    if ($ON_WINDOWS and $report_type eq "by file") {
 | 
						|
        # restore the original upper/lowercase version of the file name
 | 
						|
        foreach my $lc_file (sort keys %{$rhhh_count}) {
 | 
						|
          foreach my $cat (sort keys %{$rhhh_count->{$lc_file}}) {
 | 
						|
            foreach my $S (qw(added same modified removed)) {
 | 
						|
                $orig_case{ $upper_lower_map{$lc_file} }{$cat}{$S} =
 | 
						|
                           $rhhh_count->{$lc_file}{$cat}{$S};
 | 
						|
            }
 | 
						|
          }
 | 
						|
        }
 | 
						|
        $rhhh_count = \%orig_case;
 | 
						|
    }
 | 
						|
 | 
						|
#use Data::Dumper;
 | 
						|
#print "diff_report: ", Dumper($rhhh_count), "\n";
 | 
						|
    my @results       = ();
 | 
						|
 | 
						|
    my $languages     = ();
 | 
						|
    my %sum           = (); # sum{nFiles|blank|comment|code}{same|modified|added|removed}
 | 
						|
    my $max_len       = 0;
 | 
						|
    foreach my $language (keys %{$rhhh_count}) {
 | 
						|
        foreach my $V (qw(nFiles blank comment code)) {
 | 
						|
            foreach my $S (qw(added same modified removed)) {
 | 
						|
                $rhhh_count->{$language}{$V}{$S} = 0 unless
 | 
						|
                    defined $rhhh_count->{$language}{$V}{$S};
 | 
						|
                $sum{$V}{$S}  += $rhhh_count->{$language}{$V}{$S};
 | 
						|
            }
 | 
						|
        }
 | 
						|
        $max_len      = length($language) if length($language) > $max_len;
 | 
						|
    }
 | 
						|
    my $column_1_offset = 0;
 | 
						|
       $column_1_offset = $max_len - 17 if $max_len > 17;
 | 
						|
    $elapsed_sec = 0.5 unless $elapsed_sec;
 | 
						|
 | 
						|
    my $spacing_0 = 23;
 | 
						|
    my $spacing_1 = 13;
 | 
						|
    my $spacing_2 =  9;
 | 
						|
    my $spacing_3 = 17;
 | 
						|
    if (!$opt_3) {
 | 
						|
        $spacing_1 = 19;
 | 
						|
        $spacing_2 = 14;
 | 
						|
        $spacing_3 = 27;
 | 
						|
    }
 | 
						|
    $spacing_0 += $column_1_offset;
 | 
						|
    $spacing_1 += $column_1_offset;
 | 
						|
    $spacing_3 += $column_1_offset;
 | 
						|
    my %Format = (
 | 
						|
        '1' => { 'xml' => 'name="%s" ',
 | 
						|
                 'txt' => "\%-${spacing_0}s ",
 | 
						|
               },
 | 
						|
        '2' => { 'xml' => 'name="%s" ',
 | 
						|
                 'txt' => "\%-${spacing_3}s ",
 | 
						|
               },
 | 
						|
        '3' => { 'xml' => 'files_count="%d" ',
 | 
						|
                 'txt' => '%6d ',
 | 
						|
               },
 | 
						|
        '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
 | 
						|
                 'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
 | 
						|
               },
 | 
						|
        '5' => { 'xml' => 'blank="%.2f" comment="%.2f" code="%d" ',
 | 
						|
                 'txt' => "\%3.2f \%3.2f \%${spacing_2}d",
 | 
						|
               },
 | 
						|
        '6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
 | 
						|
                 'txt' => ' x %6.2f = %14.2f',
 | 
						|
               },
 | 
						|
    );
 | 
						|
    my $Style = "txt";
 | 
						|
       $Style = "xml" if $opt_xml ;
 | 
						|
       $Style = "xml" if $opt_yaml;  # not a typo; just set to anything but txt
 | 
						|
       $Style = "xml" if $opt_json;  # not a typo; just set to anything but txt
 | 
						|
       $Style = "xml" if $opt_csv ;  # not a typo; just set to anything but txt
 | 
						|
 | 
						|
    my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
 | 
						|
       $hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset)
 | 
						|
            if (!$opt_3) and (68 + $column_1_offset) > 79;
 | 
						|
    my $data_line  = "";
 | 
						|
    my $first_column;
 | 
						|
    my $BY_LANGUAGE = 0;
 | 
						|
    my $BY_FILE     = 0;
 | 
						|
    if      ($report_type eq "by language") {
 | 
						|
        $first_column = "Language";
 | 
						|
        $BY_LANGUAGE  = 1;
 | 
						|
    } elsif ($report_type eq "by file")     {
 | 
						|
        $first_column = "File";
 | 
						|
        $BY_FILE      = 1;
 | 
						|
    } else {
 | 
						|
        $first_column = "Report File";
 | 
						|
    }
 | 
						|
 | 
						|
    # column headers
 | 
						|
    if (!$opt_3 and $BY_FILE) {
 | 
						|
        my $spacing_n = $spacing_1 - 11;
 | 
						|
        $data_line  = sprintf "%-${spacing_n}s" , $first_column;
 | 
						|
    } else {
 | 
						|
        $data_line  = sprintf "%-${spacing_1}s ", $first_column;
 | 
						|
    }
 | 
						|
    if ($BY_FILE) {
 | 
						|
        $data_line .= sprintf "%${spacing_2}s"   , ""     ;
 | 
						|
    } else {
 | 
						|
        $data_line .= sprintf "%${spacing_2}s "  , "files";
 | 
						|
    }
 | 
						|
    my $PCT_symbol = "";
 | 
						|
       $PCT_symbol = " \%" if $opt_by_percent;
 | 
						|
    $data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
 | 
						|
        "blank${PCT_symbol}"         ,
 | 
						|
        "comment${PCT_symbol}"       ,
 | 
						|
        "code";
 | 
						|
 | 
						|
    if ($Style eq "txt") {
 | 
						|
        push @results, $data_line;
 | 
						|
        push @results, $hyphen_line;
 | 
						|
    }
 | 
						|
 | 
						|
    # sort diff output in descending order of cumulative entries
 | 
						|
    foreach my $lang_or_file (sort {
 | 
						|
                                ($rhhh_count->{$b}{'code'}{'added'}    +
 | 
						|
                                 $rhhh_count->{$b}{'code'}{'same'}     +
 | 
						|
                                 $rhhh_count->{$b}{'code'}{'modified'} +
 | 
						|
                                 $rhhh_count->{$b}{'code'}{'removed'}  )  <=>
 | 
						|
                                ($rhhh_count->{$a}{'code'}{'added'}    +
 | 
						|
                                 $rhhh_count->{$a}{'code'}{'same'}     +
 | 
						|
                                 $rhhh_count->{$a}{'code'}{'modified'} +
 | 
						|
                                 $rhhh_count->{$a}{'code'}{'removed'})
 | 
						|
                              or $a cmp $b }
 | 
						|
                                    keys %{$rhhh_count}) {
 | 
						|
 | 
						|
        if ($BY_FILE) {
 | 
						|
            push @results, rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
 | 
						|
        } else {
 | 
						|
            push @results, $lang_or_file;
 | 
						|
        }
 | 
						|
        foreach my $S (qw(same modified added removed)) {
 | 
						|
            my $indent = $spacing_1 - 2;
 | 
						|
            my $line .= sprintf " %-${indent}s", $S;
 | 
						|
            if ($BY_FILE) {
 | 
						|
                $line .= sprintf "   ";
 | 
						|
            } else {
 | 
						|
                $line .= sprintf "  %${spacing_2}s", $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
 | 
						|
            }
 | 
						|
            if ($opt_by_percent) {
 | 
						|
                my $DEN = compute_denominator($opt_by_percent  ,
 | 
						|
                    $rhhh_count->{$lang_or_file}{'code'}{$S}   ,
 | 
						|
                    $rhhh_count->{$lang_or_file}{'comment'}{$S},
 | 
						|
                    $rhhh_count->{$lang_or_file}{'blank'}{$S}  );
 | 
						|
                if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
 | 
						|
                    $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
 | 
						|
                        $rhhh_count->{$lang_or_file}{'blank'}{$S}   / $DEN * 100,
 | 
						|
                        $rhhh_count->{$lang_or_file}{'comment'}{$S} / $DEN * 100,
 | 
						|
                        $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
 | 
						|
                } else {
 | 
						|
                    $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
 | 
						|
                        0.0, 0.0, $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
 | 
						|
                }
 | 
						|
            } else {
 | 
						|
                $line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
 | 
						|
                    $rhhh_count->{$lang_or_file}{'blank'}{$S}   ,
 | 
						|
                    $rhhh_count->{$lang_or_file}{'comment'}{$S} ,
 | 
						|
                    $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
 | 
						|
            }
 | 
						|
            push @results, $line;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    push @results, $hyphen_line;
 | 
						|
    push @results, "SUM:";
 | 
						|
    my $sum_files    = 0;
 | 
						|
    my $sum_lines    = 0;
 | 
						|
    foreach my $S (qw(same modified added removed)) {
 | 
						|
        my $indent = $spacing_1 - 2;
 | 
						|
        my $line .= sprintf " %-${indent}s", $S;
 | 
						|
            if ($BY_FILE) {
 | 
						|
                $line .= sprintf "   ";
 | 
						|
                $sum_files += 1;
 | 
						|
            } else {
 | 
						|
                $line .= sprintf "  %${spacing_2}s", $sum{'nFiles'}{$S};
 | 
						|
                $sum_files += $sum{'nFiles'}{$S};
 | 
						|
            }
 | 
						|
        if ($opt_by_percent) {
 | 
						|
            my $DEN = compute_denominator($opt_by_percent,
 | 
						|
                $sum{'code'}{$S}, $sum{'comment'}{$S}, $sum{'blank'}{$S});
 | 
						|
            if ($sum{'code'}{$S} > 0) {
 | 
						|
                $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
 | 
						|
                    $sum{'blank'}{$S}   / $DEN * 100,
 | 
						|
                    $sum{'comment'}{$S} / $DEN * 100,
 | 
						|
                    $sum{'code'}{$S}    ;
 | 
						|
            } else {
 | 
						|
                $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
 | 
						|
                    0.0, 0.0, $sum{'code'}{$S}    ;
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            $line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
 | 
						|
                $sum{'blank'}{$S}   ,
 | 
						|
                $sum{'comment'}{$S} ,
 | 
						|
                $sum{'code'}{$S}    ;
 | 
						|
        }
 | 
						|
        $sum_lines += $sum{'blank'}{$S} + $sum{'comment'}{$S} + $sum{'code'}{$S};
 | 
						|
        push @results, $line;
 | 
						|
    }
 | 
						|
 | 
						|
    my $header_line  = sprintf "%s v %s", $URL, $version;
 | 
						|
       $header_line .= sprintf("  T=%.2f s (%.1f files/s, %.1f lines/s)",
 | 
						|
                        $elapsed_sec           ,
 | 
						|
                        $sum_files/$elapsed_sec,
 | 
						|
                        $sum_lines/$elapsed_sec) unless $opt_sum_reports or $opt_hide_rate;
 | 
						|
    if ($Style eq "txt") {
 | 
						|
        unshift @results, output_header($header_line, $hyphen_line, $BY_FILE);
 | 
						|
    }
 | 
						|
 | 
						|
    push @results, $hyphen_line;
 | 
						|
    write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
 | 
						|
    print "<- diff_report\n" if $opt_v > 2;
 | 
						|
 | 
						|
    return @results;
 | 
						|
} # 1}}}
 | 
						|
sub xml_yaml_or_json_header {                # {{{1
 | 
						|
    my ($URL, $version, $elapsed_sec, $sum_files, $sum_lines, $by_file) = @_;
 | 
						|
    print "-> xml_yaml_or_json_header\n" if $opt_v > 2;
 | 
						|
    my $header      = "";
 | 
						|
    my $file_rate   = $sum_files/$elapsed_sec;
 | 
						|
    my $line_rate   = $sum_lines/$elapsed_sec;
 | 
						|
    my $type        = "";
 | 
						|
       $type        = "diff_" if $opt_diff;
 | 
						|
    my $report_file = "";
 | 
						|
    if ($opt_report_file) {
 | 
						|
        my $Fname = $opt_report_file;
 | 
						|
        $Fname =~ s{\\}{\\\\}g if $ON_WINDOWS;
 | 
						|
        if ($opt_sum_reports) {
 | 
						|
            if ($by_file) {
 | 
						|
                $report_file = "  <report_file>$Fname.file</report_file>"
 | 
						|
            } else {
 | 
						|
                $report_file = "  <report_file>$Fname.lang</report_file>"
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            $report_file = "  <report_file>$Fname</report_file>"
 | 
						|
        }
 | 
						|
    }
 | 
						|
    if ($opt_xml) {
 | 
						|
        $header = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
 | 
						|
        $header .= "\n<?xml-stylesheet type=\"text/xsl\" href=\"" . $opt_xsl . "\"?>" if $opt_xsl;
 | 
						|
        if ($opt_hide_rate) {
 | 
						|
            $header .= "<${type}results>
 | 
						|
<header>
 | 
						|
  <cloc_url>$URL</cloc_url>
 | 
						|
  <cloc_version>$version</cloc_version>
 | 
						|
  <n_files>$sum_files</n_files>
 | 
						|
  <n_lines>$sum_lines</n_lines>";
 | 
						|
        } else {
 | 
						|
            $header .= "<${type}results>
 | 
						|
<header>
 | 
						|
  <cloc_url>$URL</cloc_url>
 | 
						|
  <cloc_version>$version</cloc_version>
 | 
						|
  <elapsed_seconds>$elapsed_sec</elapsed_seconds>
 | 
						|
  <n_files>$sum_files</n_files>
 | 
						|
  <n_lines>$sum_lines</n_lines>
 | 
						|
  <files_per_second>$file_rate</files_per_second>
 | 
						|
  <lines_per_second>$line_rate</lines_per_second>";
 | 
						|
        }
 | 
						|
        $header .= "\n$report_file"
 | 
						|
            if $opt_report_file;
 | 
						|
        $header .= "\n</header>";
 | 
						|
        if (%git_metadata) {
 | 
						|
            foreach my $target (keys %git_metadata) {
 | 
						|
                $header .= "\n<source>";
 | 
						|
                $header .= "\n  <target>$target</target>";
 | 
						|
                $header .= "\n  <origin>$git_metadata{$target}{'origin'}</origin>";
 | 
						|
                $header .= "\n  <branch>$git_metadata{$target}{'branch'}</branch>";
 | 
						|
                $header .= "\n  <commit>$git_metadata{$target}{'commit'}</commit>";
 | 
						|
                $header .= "\n</source>";
 | 
						|
            }
 | 
						|
        }
 | 
						|
    } elsif ($opt_yaml or $opt_json) {
 | 
						|
        my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
 | 
						|
        if ($opt_hide_rate) {
 | 
						|
            $header = "${start}${Q}header${Q} : $open_B
 | 
						|
  ${Q}cloc_url${Q}           : ${Q}$URL${Q}${C}
 | 
						|
  ${Q}cloc_version${Q}       : ${Q}$version${Q}${C}
 | 
						|
  ${Q}n_files${Q}            : $sum_files${C}
 | 
						|
  ${Q}n_lines${Q}            : $sum_lines";
 | 
						|
        } else {
 | 
						|
            $header = "${start}${Q}header${Q} : $open_B
 | 
						|
  ${Q}cloc_url${Q}           : ${Q}$URL${Q}${C}
 | 
						|
  ${Q}cloc_version${Q}       : ${Q}$version${Q}${C}
 | 
						|
  ${Q}elapsed_seconds${Q}    : $elapsed_sec${C}
 | 
						|
  ${Q}n_files${Q}            : $sum_files${C}
 | 
						|
  ${Q}n_lines${Q}            : $sum_lines${C}
 | 
						|
  ${Q}files_per_second${Q}   : $file_rate${C}
 | 
						|
  ${Q}lines_per_second${Q}   : $line_rate";
 | 
						|
        }
 | 
						|
        if ($opt_report_file) {
 | 
						|
            my $Fname = $opt_report_file;
 | 
						|
            $Fname =~ s{\\}{\\\\}g if $ON_WINDOWS;
 | 
						|
            if ($opt_sum_reports) {
 | 
						|
                if ($by_file) {
 | 
						|
                    $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$Fname.file${Q}"
 | 
						|
                } else {
 | 
						|
                    $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$Fname.lang${Q}"
 | 
						|
                }
 | 
						|
            } else {
 | 
						|
                $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$Fname${Q}";
 | 
						|
            }
 | 
						|
        }
 | 
						|
        $header .= "${close_B}${C}";
 | 
						|
    }
 | 
						|
    print "<- xml_yaml_or_json_header\n" if $opt_v > 2;
 | 
						|
    return $header;
 | 
						|
} # 1}}}
 | 
						|
sub diff_yaml_report {                       # {{{1
 | 
						|
    # returns an array of lines containing the results
 | 
						|
    my ($version    , # in
 | 
						|
        $elapsed_sec, # in
 | 
						|
        $report_type, # in  "by language" | "by report file" | "by file"
 | 
						|
        $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 | 
						|
        $rh_scale   , # in
 | 
						|
       ) = @_;
 | 
						|
    print "-> diff_yaml_report\n" if $opt_v > 2;
 | 
						|
    $elapsed_sec = 0.5 unless $elapsed_sec;
 | 
						|
    my @results       = ();
 | 
						|
    my %sum           = ();
 | 
						|
    my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
 | 
						|
        diff_header_sum($report_type, $rhhh_count, \%sum);
 | 
						|
 | 
						|
    if (!$ALREADY_SHOWED_HEADER) {
 | 
						|
        push @results,
 | 
						|
              xml_yaml_or_json_header($URL, $version, $elapsed_sec,
 | 
						|
                                 $sum_files, $sum_lines, $BY_FILE);
 | 
						|
        $ALREADY_SHOWED_HEADER = 1;
 | 
						|
    }
 | 
						|
    foreach my $S (qw(added same modified removed)) {
 | 
						|
        push @results, "$S :";
 | 
						|
        foreach my $F_or_L (keys %{$rhhh_count}) {
 | 
						|
            # force quoted language or filename in case these
 | 
						|
            # have embedded funny characters, issue #312
 | 
						|
            push @results, "  '" . rm_leading_tempdir($F_or_L, \%TEMP_DIR) . "' :";
 | 
						|
            foreach my $k (keys %{$rhhh_count->{$F_or_L}}) {
 | 
						|
                next if $k eq "lang"; # present only in those cases
 | 
						|
                                      # where code exists for action $S
 | 
						|
                $rhhh_count->{$F_or_L}{$k}{$S} = 0 unless
 | 
						|
                    defined $rhhh_count->{$F_or_L}{$k}{$S};
 | 
						|
                push @results,
 | 
						|
                    "    $k : $rhhh_count->{$F_or_L}{$k}{$S}";
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    push @results, "SUM :";
 | 
						|
    foreach my $S (qw(added same modified removed)) {
 | 
						|
        push @results, "  $S :";
 | 
						|
        foreach my $topic (keys %sum) {
 | 
						|
            push @results, "    $topic : $sum{$topic}{$S}";
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- diff_yaml_report\n" if $opt_v > 2;
 | 
						|
 | 
						|
    return @results;
 | 
						|
} # 1}}}
 | 
						|
sub diff_json_report {                       # {{{1
 | 
						|
    # returns an array of lines containing the results
 | 
						|
    my ($version    , # in
 | 
						|
        $elapsed_sec, # in
 | 
						|
        $report_type, # in  "by language" | "by report file" | "by file"
 | 
						|
        $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 | 
						|
        $rh_scale   , # in
 | 
						|
       ) = @_;
 | 
						|
    print "-> diff_json_report\n" if $opt_v > 2;
 | 
						|
    $elapsed_sec = 0.5 unless $elapsed_sec;
 | 
						|
    my @results       = ();
 | 
						|
    my %sum           = ();
 | 
						|
    my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
 | 
						|
        diff_header_sum($report_type, $rhhh_count, \%sum);
 | 
						|
 | 
						|
    if (!$ALREADY_SHOWED_HEADER) {
 | 
						|
        push @results,
 | 
						|
              xml_yaml_or_json_header($URL, $version, $elapsed_sec,
 | 
						|
                                 $sum_files, $sum_lines, $BY_FILE);
 | 
						|
        $ALREADY_SHOWED_HEADER = 1;
 | 
						|
    }
 | 
						|
    foreach my $S (qw(added same modified removed)) {
 | 
						|
        push @results, " \"$S\" : {";
 | 
						|
        foreach my $F_or_L (keys %{$rhhh_count}) {
 | 
						|
            push @results, "  \"" . rm_leading_tempdir($F_or_L, \%TEMP_DIR) . "\" : {";
 | 
						|
            foreach my $k (keys %{$rhhh_count->{$F_or_L}}) {
 | 
						|
                next if $k eq "lang"; # present only in those cases
 | 
						|
                                      # where code exists for action $S
 | 
						|
                $rhhh_count->{$F_or_L}{$k}{$S} = 0 unless
 | 
						|
                    defined $rhhh_count->{$F_or_L}{$k}{$S};
 | 
						|
                push @results,
 | 
						|
                    "    \"$k\" : $rhhh_count->{$F_or_L}{$k}{$S},";
 | 
						|
            }
 | 
						|
            $results[-1] =~ s/,\s*$//;
 | 
						|
            push @results, "  },"
 | 
						|
        }
 | 
						|
        $results[-1] =~ s/,\s*$//;
 | 
						|
        push @results, "  },"
 | 
						|
    }
 | 
						|
 | 
						|
    push @results, "  \"SUM\" : {";
 | 
						|
    foreach my $S (qw(added same modified removed)) {
 | 
						|
        push @results, "  \"$S\" : {";
 | 
						|
        foreach my $topic (keys %sum) {
 | 
						|
            push @results, "    \"$topic\" : $sum{$topic}{$S},";
 | 
						|
        }
 | 
						|
        $results[-1] =~ s/,\s*$//;
 | 
						|
        push @results, "},";
 | 
						|
    }
 | 
						|
 | 
						|
    $results[-1] =~ s/,\s*$//;
 | 
						|
    push @results, "} }";
 | 
						|
    print "<- diff_json_report\n" if $opt_v > 2;
 | 
						|
    return @results;
 | 
						|
} # 1}}}
 | 
						|
sub diff_header_sum {                        # {{{1
 | 
						|
    my ($report_type, # in  "by language" | "by report file" | "by file"
 | 
						|
        $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 | 
						|
        $rhh_sum    , # out sum{nFiles|blank|comment|code}{same|modified|added|removed}
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    my $sum_files = 0;
 | 
						|
    my $sum_lines = 0;
 | 
						|
    foreach my $language (keys %{$rhhh_count}) {
 | 
						|
        foreach my $V (qw(nFiles blank comment code)) {
 | 
						|
            foreach my $S (qw(added same modified removed)) {
 | 
						|
                $rhhh_count->{$language}{$V}{$S} = 0 unless
 | 
						|
                    defined $rhhh_count->{$language}{$V}{$S};
 | 
						|
                $rhh_sum->{$V}{$S}  += $rhhh_count->{$language}{$V}{$S};
 | 
						|
                if ($V eq "nFiles") {
 | 
						|
                    $sum_files += $rhhh_count->{$language}{$V}{$S};
 | 
						|
                } else {
 | 
						|
                    $sum_lines += $rhhh_count->{$language}{$V}{$S};
 | 
						|
                }
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    my $BY_LANGUAGE = 0;
 | 
						|
    my $BY_FILE     = 0;
 | 
						|
    if      ($report_type eq "by language") {
 | 
						|
        $BY_LANGUAGE  = 1;
 | 
						|
    } elsif ($report_type eq "by file")     {
 | 
						|
        $BY_FILE      = 1;
 | 
						|
    }
 | 
						|
    return $sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE;
 | 
						|
} # 1}}}
 | 
						|
sub diff_xml_report {                        # {{{1
 | 
						|
    # returns an array of lines containing the results
 | 
						|
    my ($version    , # in
 | 
						|
        $elapsed_sec, # in
 | 
						|
        $report_type, # in  "by language" | "by report file" | "by file"
 | 
						|
        $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 | 
						|
        $rh_scale   , # in
 | 
						|
       ) = @_;
 | 
						|
    print "-> diff_xml_report\n" if $opt_v > 2;
 | 
						|
    my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
 | 
						|
 | 
						|
#print "diff_report: ", Dumper($rhhh_count), "\n";
 | 
						|
    $elapsed_sec = 0.5 unless $elapsed_sec;
 | 
						|
    my @results       = ();
 | 
						|
    my %sum           = ();
 | 
						|
    my $languages     = ();
 | 
						|
 | 
						|
    my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
 | 
						|
        diff_header_sum($report_type, $rhhh_count, \%sum);
 | 
						|
 | 
						|
    my $data_line   = "";
 | 
						|
 | 
						|
    if (!$ALREADY_SHOWED_HEADER) {
 | 
						|
        push @results,
 | 
						|
              xml_yaml_or_json_header($URL, $version, $elapsed_sec,
 | 
						|
                                 $sum_files, $sum_lines, $BY_FILE);
 | 
						|
        $ALREADY_SHOWED_HEADER = 1;
 | 
						|
    }
 | 
						|
 | 
						|
    foreach my $S (qw(same modified added removed)) {
 | 
						|
        push @results, "  <$S>";
 | 
						|
        foreach my $lang_or_file (sort {
 | 
						|
                                     $rhhh_count->{$b}{'code'} <=>
 | 
						|
                                     $rhhh_count->{$a}{'code'}
 | 
						|
                                   }
 | 
						|
                              keys %{$rhhh_count}) {
 | 
						|
            my $L = "";
 | 
						|
 | 
						|
            if ($BY_FILE) {
 | 
						|
                $L .= sprintf "    <file name=\"%s\" files_count=\"1\" ",
 | 
						|
                    xml_metachars(
 | 
						|
                        rm_leading_tempdir($lang_or_file, \%TEMP_DIR));
 | 
						|
            } else {
 | 
						|
                $L .= sprintf "    <language name=\"%s\" files_count=\"%d\" ",
 | 
						|
                        $lang_or_file ,
 | 
						|
                        $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
 | 
						|
            }
 | 
						|
            if ($opt_by_percent) {
 | 
						|
              my $DEN = compute_denominator($opt_by_percent            ,
 | 
						|
                            $rhhh_count->{$lang_or_file}{'code'}{$S}   ,
 | 
						|
                            $rhhh_count->{$lang_or_file}{'comment'}{$S},
 | 
						|
                            $rhhh_count->{$lang_or_file}{'blank'}{$S}  );
 | 
						|
              foreach my $T (qw(blank comment)) {
 | 
						|
                  if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
 | 
						|
                    $L .= sprintf "%s=\"%.2f\" ",
 | 
						|
                            $T, $rhhh_count->{$lang_or_file}{$T}{$S} / $DEN * 100;
 | 
						|
                  } else {
 | 
						|
                    $L .= sprintf "%s=\"0.0\" ", $T;
 | 
						|
                  }
 | 
						|
              }
 | 
						|
              foreach my $T (qw(code)) {
 | 
						|
                  $L .= sprintf "%s=\"%d\" ",
 | 
						|
                          $T, $rhhh_count->{$lang_or_file}{$T}{$S};
 | 
						|
              }
 | 
						|
            } else {
 | 
						|
              foreach my $T (qw(blank comment code)) {
 | 
						|
                  $L .= sprintf "%s=\"%d\" ",
 | 
						|
                          $T, $rhhh_count->{$lang_or_file}{$T}{$S};
 | 
						|
              }
 | 
						|
            }
 | 
						|
            push @results, $L . "/>";
 | 
						|
        }
 | 
						|
 | 
						|
 | 
						|
        my $L = sprintf "    <total sum_files=\"%d\" ", $sum{'nFiles'}{$S};
 | 
						|
        if ($opt_by_percent) {
 | 
						|
          my $DEN = compute_denominator($opt_by_percent,
 | 
						|
                        $sum{'code'}{$S}   ,
 | 
						|
                        $sum{'comment'}{$S},
 | 
						|
                        $sum{'blank'}{$S}  );
 | 
						|
          foreach my $V (qw(blank comment)) {
 | 
						|
              if ($sum{'code'}{$S} > 0) {
 | 
						|
                  $L .= sprintf "%s=\"%.2f\" ", $V, $sum{$V}{$S} / $DEN * 100;
 | 
						|
              } else {
 | 
						|
                  $L .= sprintf "%s=\"0.0\" ", $V;
 | 
						|
              }
 | 
						|
          }
 | 
						|
          foreach my $V (qw(code)) {
 | 
						|
              $L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
 | 
						|
          }
 | 
						|
        } else {
 | 
						|
          foreach my $V (qw(blank comment code)) {
 | 
						|
              $L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
 | 
						|
          }
 | 
						|
        }
 | 
						|
        push @results, $L . "/>";
 | 
						|
        push @results, "  </$S>";
 | 
						|
    }
 | 
						|
 | 
						|
    push @results, "</diff_results>";
 | 
						|
    write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
 | 
						|
    print "<- diff_xml_report\n" if $opt_v > 2;
 | 
						|
    return @results;
 | 
						|
} # 1}}}
 | 
						|
sub diff_csv_report {                        # {{{1
 | 
						|
    # returns an array of lines containing the results
 | 
						|
    my ($version    , # in
 | 
						|
        $elapsed_sec, # in
 | 
						|
        $report_type, # in  "by language" | "by report file" | "by file"
 | 
						|
        $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 | 
						|
        $rh_scale   , # in  unused
 | 
						|
       ) = @_;
 | 
						|
    print "-> diff_csv_report\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @results       = ();
 | 
						|
    my $languages     = ();
 | 
						|
 | 
						|
    my $data_line   = "";
 | 
						|
    my $BY_LANGUAGE = 0;
 | 
						|
    my $BY_FILE     = 0;
 | 
						|
    if      ($report_type eq "by language") {
 | 
						|
        $BY_LANGUAGE  = 1;
 | 
						|
    } elsif ($report_type eq "by file")     {
 | 
						|
        $BY_FILE      = 1;
 | 
						|
    }
 | 
						|
    my $DELIM = ",";
 | 
						|
       $DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
 | 
						|
       $DELIM = "|" if defined $opt_md;
 | 
						|
 | 
						|
    $elapsed_sec = 0.5 unless $elapsed_sec;
 | 
						|
 | 
						|
    my $line = "Language${DELIM} ";
 | 
						|
       $line = "File${DELIM} " if $BY_FILE;
 | 
						|
    foreach my $item (qw(files blank comment code)) {
 | 
						|
        next if $BY_FILE and $item eq 'files';
 | 
						|
        foreach my $symbol ( '==', '!=', '+', '-', ) {
 | 
						|
            $line .= "$symbol $item${DELIM} ";
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    my $T_elapsed_sec = "T=$elapsed_sec s";
 | 
						|
       $T_elapsed_sec = "" if $opt_hide_rate;
 | 
						|
 | 
						|
    if ($opt_md) {
 | 
						|
        push @results, "cloc|$URL v $version $T_elapsed_sec";
 | 
						|
        push @results, "--- | ---";
 | 
						|
        push @results, "";
 | 
						|
        push @results, $line;
 | 
						|
        my @col_header  = ();
 | 
						|
        push @col_header, ":-------";
 | 
						|
        foreach (1..16) {
 | 
						|
            push @col_header, "-------:";
 | 
						|
        }
 | 
						|
        push @results, join("|", @col_header) . "|";
 | 
						|
    } else {
 | 
						|
        $line .= "\"$URL v $version $T_elapsed_sec\"";
 | 
						|
        push @results, $line;
 | 
						|
    }
 | 
						|
 | 
						|
    foreach my $lang_or_file (keys %{$rhhh_count}) {
 | 
						|
        $rhhh_count->{$lang_or_file}{'code'}{'added'} = 0 unless
 | 
						|
            defined $rhhh_count->{$lang_or_file}{'code'};
 | 
						|
    }
 | 
						|
    foreach my $lang_or_file (sort {
 | 
						|
                                 $rhhh_count->{$b}{'code'} <=>
 | 
						|
                                 $rhhh_count->{$a}{'code'}
 | 
						|
                               }
 | 
						|
                          keys %{$rhhh_count}) {
 | 
						|
        if ($BY_FILE) {
 | 
						|
            $line = rm_leading_tempdir($lang_or_file, \%TEMP_DIR) . "$DELIM ";
 | 
						|
        } else {
 | 
						|
            $line = $lang_or_file . "${DELIM} ";
 | 
						|
        }
 | 
						|
        if ($opt_by_percent) {
 | 
						|
          foreach my $item (qw(nFiles)) {
 | 
						|
              next if $BY_FILE and $item eq 'nFiles';
 | 
						|
              foreach my $symbol (qw(same modified added removed)) {
 | 
						|
                  if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
 | 
						|
                      $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
 | 
						|
                  } else {
 | 
						|
                      $line .= "0${DELIM} ";
 | 
						|
                  }
 | 
						|
              }
 | 
						|
          }
 | 
						|
          foreach my $item (qw(blank comment)) {
 | 
						|
              foreach my $symbol (qw(same modified added removed)) {
 | 
						|
                  if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol} and
 | 
						|
                      defined $rhhh_count->{$lang_or_file}{'code'}{$symbol} and
 | 
						|
                      $rhhh_count->{$lang_or_file}{'code'}{$symbol} > 0) {
 | 
						|
                      $line .= sprintf("%.2f", $rhhh_count->{$lang_or_file}{$item}{$symbol} / $rhhh_count->{$lang_or_file}{'code'}{$symbol} * 100).${DELIM};
 | 
						|
                  } else {
 | 
						|
                      $line .= "0.00${DELIM} ";
 | 
						|
                  }
 | 
						|
              }
 | 
						|
          }
 | 
						|
          foreach my $item (qw(code)) {
 | 
						|
              foreach my $symbol (qw(same modified added removed)) {
 | 
						|
                  if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
 | 
						|
                      $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
 | 
						|
                  } else {
 | 
						|
                      $line .= "0${DELIM} ";
 | 
						|
                  }
 | 
						|
              }
 | 
						|
          }
 | 
						|
        } else {
 | 
						|
          foreach my $item (qw(nFiles blank comment code)) {
 | 
						|
              next if $BY_FILE and $item eq 'nFiles';
 | 
						|
              foreach my $symbol (qw(same modified added removed)) {
 | 
						|
                  if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
 | 
						|
                      $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
 | 
						|
                  } else {
 | 
						|
                      $line .= "0${DELIM} ";
 | 
						|
                  }
 | 
						|
              }
 | 
						|
          }
 | 
						|
        }
 | 
						|
        push @results, $line;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- diff_csv_report\n" if $opt_v > 2;
 | 
						|
    return @results;
 | 
						|
} # 1}}}
 | 
						|
sub rm_leading_tempdir {                     # {{{1
 | 
						|
    my ($in_file, $rh_temp_dirs, ) = @_;
 | 
						|
    my $clean_filename = $in_file;
 | 
						|
    foreach my $temp_d (keys %{$rh_temp_dirs}) {
 | 
						|
        if ($ON_WINDOWS) {
 | 
						|
        # \ -> / necessary to allow the next if test's
 | 
						|
        # m{} to work in the presence of spaces in file names
 | 
						|
            $temp_d         =~ s{\\}{/}g;
 | 
						|
            $clean_filename =~ s{\\}{/}g;
 | 
						|
        }
 | 
						|
        if ($clean_filename =~ m{^$temp_d/}) {
 | 
						|
            $clean_filename =~ s{^$temp_d/}{};
 | 
						|
            last;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    if ($ON_WINDOWS and $opt_by_file) { # then go back from / to \
 | 
						|
        if ($opt_json) {
 | 
						|
            $clean_filename =~ s{/}{\\\\}g;
 | 
						|
        } else {
 | 
						|
            $clean_filename =~ s{/}{\\}g;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return $clean_filename;
 | 
						|
} # 1}}}
 | 
						|
sub generate_sql    {                        # {{{1
 | 
						|
    my ($elapsed_sec, # in
 | 
						|
        $rhh_count  , # in  count{TYPE}{lang|code|blank|comment|scaled}
 | 
						|
        $rh_scale   , # in
 | 
						|
       ) = @_;
 | 
						|
    print "-> generate_sql\n" if $opt_v > 2;
 | 
						|
 | 
						|
#print "generate_sql A [$opt_sql_project]\n";
 | 
						|
    $opt_sql_project = cwd() unless defined $opt_sql_project;
 | 
						|
    $opt_sql_project = '' unless defined $opt_sql_project; # have seen cwd() fail
 | 
						|
#print "generate_sql B [$opt_sql_project]\n";
 | 
						|
    $opt_sql_project =~ s{/}{\\}g if $ON_WINDOWS;
 | 
						|
#print "generate_sql C [$opt_sql_project]\n";
 | 
						|
 | 
						|
    my $schema = undef;
 | 
						|
    if ($opt_sql_style eq "oracle") {
 | 
						|
        $schema = "
 | 
						|
CREATE TABLE metadata
 | 
						|
(
 | 
						|
  id          INTEGER PRIMARY KEY,
 | 
						|
  timestamp   TIMESTAMP,
 | 
						|
  project     VARCHAR2(500 CHAR),
 | 
						|
  elapsed_s   NUMBER(10, 6)
 | 
						|
)
 | 
						|
/
 | 
						|
 | 
						|
CREATE TABLE t
 | 
						|
(
 | 
						|
  id             INTEGER           ,
 | 
						|
  project        VARCHAR2(500 CHAR),
 | 
						|
  language       VARCHAR2(500 CHAR),
 | 
						|
  file_fullname  VARCHAR2(500 CHAR),
 | 
						|
  file_dirname   VARCHAR2(500 CHAR),
 | 
						|
  file_basename  VARCHAR2(500 CHAR),
 | 
						|
  nblank         INTEGER,
 | 
						|
  ncomment       INTEGER,
 | 
						|
  ncode          INTEGER,
 | 
						|
  nscaled        NUMBER(10, 6),
 | 
						|
FOREIGN KEY (id)
 | 
						|
    REFERENCES metadata (id)
 | 
						|
)
 | 
						|
/
 | 
						|
 | 
						|
";
 | 
						|
    } else {
 | 
						|
        $schema = "
 | 
						|
create table metadata (          -- $URL v $VERSION
 | 
						|
                id        integer primary key,
 | 
						|
                timestamp varchar(500),
 | 
						|
                Project   varchar(500),
 | 
						|
                elapsed_s real);
 | 
						|
create table t        (
 | 
						|
                id            integer        ,
 | 
						|
                Project       varchar(500)   ,
 | 
						|
                Language      varchar(500)   ,
 | 
						|
                File          varchar(500)   ,
 | 
						|
                File_dirname  varchar(500)   ,
 | 
						|
                File_basename varchar(500)   ,
 | 
						|
                nBlank        integer        ,
 | 
						|
                nComment      integer        ,
 | 
						|
                nCode         integer        ,
 | 
						|
                nScaled       real           ,
 | 
						|
        foreign key (id)
 | 
						|
            references metadata (id));
 | 
						|
";
 | 
						|
    }
 | 
						|
    $opt_sql = "-" if $opt_sql eq "1";
 | 
						|
 | 
						|
    my $open_mode = ">";
 | 
						|
       $open_mode = ">>" if $opt_sql_append;
 | 
						|
 | 
						|
    my $fh;
 | 
						|
    if ($ON_WINDOWS and $HAVE_Win32_Long_Path and $opt_sql ne "-") {
 | 
						|
        # only use the Win32::LongPath wrapper here when needed,
 | 
						|
        # and only when not writing to STDOUT.
 | 
						|
        $fh = open_file($open_mode, $opt_sql, 1);
 | 
						|
        die "Unable to write to $opt_sql\n" if !defined $fh;
 | 
						|
    } else {
 | 
						|
        $fh = new IO::File; # $opt_sql, "w";
 | 
						|
        if (!$fh->open("${open_mode}${opt_sql}")) {
 | 
						|
            die "Unable to write to $opt_sql  $!\n";
 | 
						|
        }
 | 
						|
    }
 | 
						|
    print $fh $schema unless defined $opt_sql_append;
 | 
						|
 | 
						|
    my $id = int(time());
 | 
						|
    my $insert_into_t = "insert into t ";
 | 
						|
    if ($opt_sql_style eq "oracle") {
 | 
						|
        printf $fh "insert into metadata values(%d, TO_TIMESTAMP('%s','yyyy-mm-dd hh24:mi:ss'), '%s', %f);\n",
 | 
						|
                    $id,
 | 
						|
                    strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
 | 
						|
                    $opt_sql_project, $elapsed_sec;
 | 
						|
    } elsif ($opt_sql_style eq "named_columns") {
 | 
						|
        print $fh "begin transaction;\n";
 | 
						|
        $insert_into_t .= "(id, Project, Language, File, File_dirname, File_basename, nBlank, nComment, nCode, nScaled )";
 | 
						|
    } else {
 | 
						|
        print $fh "begin transaction;\n";
 | 
						|
        printf $fh "insert into metadata values(%d, '%s', '%s', %f);\n",
 | 
						|
                    $id, strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
 | 
						|
                    $opt_sql_project, $elapsed_sec;
 | 
						|
    }
 | 
						|
 | 
						|
    my $nIns = 0;
 | 
						|
    foreach my $file (keys %{$rhh_count}) {
 | 
						|
        my $language = $rhh_count->{$file}{'lang'};
 | 
						|
        my $clean_filename = $file;
 | 
						|
        # If necessary (that is, if the input contained an
 | 
						|
        # archive file [.tar.gz, etc]), strip the temporary
 | 
						|
        # directory name which was used to expand the archive
 | 
						|
        # from the file name.
 | 
						|
 | 
						|
        $clean_filename = rm_leading_tempdir($clean_filename, \%TEMP_DIR);
 | 
						|
        $clean_filename =~ s/\'/''/g;  # double embedded single quotes
 | 
						|
                                       # to escape them
 | 
						|
 | 
						|
        printf $fh "$insert_into_t values(%d, '%s', '%s', '%s', '%s', '%s', " .
 | 
						|
                   "%d, %d, %d, %f);\n",
 | 
						|
                    $id                        ,
 | 
						|
                    $opt_sql_project           ,
 | 
						|
                    $language                  ,
 | 
						|
                    $clean_filename            ,
 | 
						|
                    dirname( $clean_filename)  ,
 | 
						|
                    basename($clean_filename)  ,
 | 
						|
                    $rhh_count->{$file}{'blank'},
 | 
						|
                    $rhh_count->{$file}{'comment'},
 | 
						|
                    $rhh_count->{$file}{'code'}   ,
 | 
						|
                    $rhh_count->{$file}{'code'}*$rh_scale->{$language};
 | 
						|
 | 
						|
        ++$nIns;
 | 
						|
        if (!($nIns % 10_000) and ($opt_sql_style ne "oracle")) {
 | 
						|
            print $fh "commit;\n";
 | 
						|
            print $fh "begin transaction;\n";
 | 
						|
        }
 | 
						|
    }
 | 
						|
    if ($opt_sql_style ne "oracle") {
 | 
						|
        print $fh "commit;\n";
 | 
						|
    }
 | 
						|
 | 
						|
    $fh->close unless $opt_sql eq "-"; # don't try to close STDOUT
 | 
						|
    print "<- generate_sql\n" if $opt_v > 2;
 | 
						|
 | 
						|
    # sample query:
 | 
						|
    #
 | 
						|
    #   select project, language,
 | 
						|
    #          sum(nCode)     as Code,
 | 
						|
    #          sum(nComment)  as Comments,
 | 
						|
    #          sum(nBlank)    as Blank,
 | 
						|
    #          sum(nCode)+sum(nComment)+sum(nBlank) as All_Lines,
 | 
						|
    #          100.0*sum(nComment)/(sum(nCode)+sum(nComment)) as Comment_Pct
 | 
						|
    #          from t group by Project, Language order by Project, Code desc;
 | 
						|
    #
 | 
						|
} # 1}}}
 | 
						|
sub output_header   {                        # {{{1
 | 
						|
    my ($header_line,
 | 
						|
        $hyphen_line,
 | 
						|
        $BY_FILE    ,)    = @_;
 | 
						|
    print "-> output_header\n" if $opt_v > 2;
 | 
						|
    my @R = ();
 | 
						|
    if      ($opt_xml) {
 | 
						|
        if (!$ALREADY_SHOWED_XML_SECTION) {
 | 
						|
            push @R, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
 | 
						|
            push @R, '<?xml-stylesheet type="text/xsl" href="' .
 | 
						|
                            $opt_xsl . '"?>' if $opt_xsl;
 | 
						|
            push @R, "<results>";
 | 
						|
            push @R, "<header>$header_line</header>";
 | 
						|
            $ALREADY_SHOWED_XML_SECTION = 1;
 | 
						|
        }
 | 
						|
        if ($BY_FILE) {
 | 
						|
            push @R, "<files>";
 | 
						|
        } else {
 | 
						|
            push @R, "<languages>";
 | 
						|
        }
 | 
						|
    } elsif ($opt_yaml) {
 | 
						|
        push @R, "---\n# $header_line";
 | 
						|
    } elsif ($opt_csv or $opt_md) {
 | 
						|
        # append the header to the end of the column headers
 | 
						|
        # to keep the output a bit cleaner from a spreadsheet
 | 
						|
        # perspective
 | 
						|
    } else {
 | 
						|
        if ($ALREADY_SHOWED_HEADER) {
 | 
						|
            push @R, "";
 | 
						|
        } else {
 | 
						|
            push @R, $header_line;
 | 
						|
            $ALREADY_SHOWED_HEADER = 1;
 | 
						|
        }
 | 
						|
        push @R, $hyphen_line;
 | 
						|
    }
 | 
						|
    print "<- output_header\n" if $opt_v > 2;
 | 
						|
    return @R;
 | 
						|
} # 1}}}
 | 
						|
sub generate_report {                        # {{{1
 | 
						|
    # returns an array of lines containing the results
 | 
						|
    my ($version    , # in
 | 
						|
        $elapsed_sec, # in
 | 
						|
        $report_type, # in  "by language" | "by report file" | "by file"
 | 
						|
        $rhh_count  , # in  count{TYPE}{nFiles|code|blank|comment|scaled}
 | 
						|
                      #       where TYPE = name of language, source file,
 | 
						|
                      #                    or report file
 | 
						|
        $rh_scale   , # in
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    my %orig_case = ();
 | 
						|
    if ($ON_WINDOWS and $report_type eq "by file") {
 | 
						|
        # restore the original upper/lowercase version of the file name
 | 
						|
        foreach my $lc_file (sort keys %{$rhh_count}) {
 | 
						|
            foreach my $cat (sort keys %{$rhh_count->{$lc_file}}) {
 | 
						|
                $orig_case{ $upper_lower_map{$lc_file} }{$cat} =
 | 
						|
                           $rhh_count->{$lc_file}{$cat};
 | 
						|
            }
 | 
						|
        }
 | 
						|
        $rhh_count = \%orig_case;
 | 
						|
    }
 | 
						|
    print "-> generate_report\n" if $opt_v > 2;
 | 
						|
    my $DELIM = ",";
 | 
						|
       $DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
 | 
						|
       $DELIM = "|" if defined $opt_md;
 | 
						|
 | 
						|
    my @results       = ();
 | 
						|
 | 
						|
    my $languages     = ();
 | 
						|
 | 
						|
    my $sum_files     = 0;
 | 
						|
    my $sum_code      = 0;
 | 
						|
    my $sum_blank     = 0;
 | 
						|
    my $sum_comment   = 0;
 | 
						|
    my $max_len       = 0;
 | 
						|
    foreach my $language (keys %{$rhh_count}) {
 | 
						|
        $sum_files   += $rhh_count->{$language}{'nFiles'} ;
 | 
						|
        $sum_blank   += $rhh_count->{$language}{'blank'}  ;
 | 
						|
        $sum_comment += $rhh_count->{$language}{'comment'};
 | 
						|
        $sum_code    += $rhh_count->{$language}{'code'}   ;
 | 
						|
        $max_len      = length($language) if length($language) > $max_len;
 | 
						|
    }
 | 
						|
    my $column_1_offset = 0;
 | 
						|
       $column_1_offset = $max_len - 17 if $max_len > 17;
 | 
						|
    my $sum_lines = $sum_blank + $sum_comment + $sum_code;
 | 
						|
    $elapsed_sec = 0.5 unless $elapsed_sec;
 | 
						|
 | 
						|
    my $spacing_0 = 23;
 | 
						|
    my $spacing_1 = 13;
 | 
						|
    my $spacing_2 =  9;
 | 
						|
    my $spacing_3 = 17;
 | 
						|
    if (!$opt_3) {
 | 
						|
        $spacing_1 = 19;
 | 
						|
        $spacing_2 = 14;
 | 
						|
        $spacing_3 = 27;
 | 
						|
    }
 | 
						|
    $spacing_0 += $column_1_offset;
 | 
						|
    $spacing_1 += $column_1_offset;
 | 
						|
    $spacing_3 += $column_1_offset;
 | 
						|
    my %Format = (
 | 
						|
        '1' => { 'xml' => 'name="%s" ',
 | 
						|
                 'txt' => "\%-${spacing_0}s ",
 | 
						|
               },
 | 
						|
        '2' => { 'xml' => 'name="%s" ',
 | 
						|
                 'txt' => "\%-${spacing_3}s ",
 | 
						|
               },
 | 
						|
        '3' => { 'xml' => 'files_count="%d" ',
 | 
						|
                 'txt' => '%6d ',
 | 
						|
               },
 | 
						|
        '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
 | 
						|
                 'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
 | 
						|
               },
 | 
						|
        '5' => { 'xml' => 'blank="%3.2f" comment="%3.2f" code="%d" ',
 | 
						|
                 'txt' => "\%14.2f \%14.2f \%${spacing_2}d",
 | 
						|
               },
 | 
						|
        '6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
 | 
						|
                 'txt' => ' x %6.2f = %14.2f',
 | 
						|
               },
 | 
						|
    );
 | 
						|
    my $Style = "txt";
 | 
						|
       $Style = "xml" if $opt_xml ;
 | 
						|
       $Style = "xml" if $opt_yaml;  # not a typo; just set to anything but txt
 | 
						|
       $Style = "xml" if $opt_json;  # not a typo; just set to anything but txt
 | 
						|
       $Style = "xml" if $opt_csv ;  # not a typo; just set to anything but txt
 | 
						|
 | 
						|
    my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
 | 
						|
       $hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset)
 | 
						|
            if (!$opt_sum_reports) and (!$opt_3) and (68 + $column_1_offset) > 79;
 | 
						|
    my $data_line  = "";
 | 
						|
    my $first_column;
 | 
						|
    my $BY_LANGUAGE = 0;
 | 
						|
    my $BY_FILE     = 0;
 | 
						|
    if      ($report_type eq "by language") {
 | 
						|
        $first_column = "Language";
 | 
						|
        $BY_LANGUAGE  = 1;
 | 
						|
    } elsif ($report_type eq "by file")     {
 | 
						|
        $first_column = "File";
 | 
						|
        $BY_FILE      = 1;
 | 
						|
    } elsif ($report_type eq "by report file")     {
 | 
						|
        $first_column = "File";
 | 
						|
    } else {
 | 
						|
        $first_column = "Report File";
 | 
						|
    }
 | 
						|
 | 
						|
    my $header_line  = sprintf "%s v %s", $URL, $version;
 | 
						|
       $header_line .= sprintf("  T=%.2f s (%.1f files/s, %.1f lines/s)",
 | 
						|
                        $elapsed_sec           ,
 | 
						|
                        $sum_files/$elapsed_sec,
 | 
						|
                        $sum_lines/$elapsed_sec) unless $opt_sum_reports or $opt_hide_rate;
 | 
						|
    if ($opt_xml or $opt_yaml or $opt_json) {
 | 
						|
        if (!$ALREADY_SHOWED_HEADER) {
 | 
						|
            if ($opt_by_file_by_lang and $opt_json) {
 | 
						|
                push @results, '{ "by_file" : ';
 | 
						|
            }
 | 
						|
            push @results, xml_yaml_or_json_header($URL, $version, $elapsed_sec,
 | 
						|
                                                   $sum_files, $sum_lines, $BY_FILE);
 | 
						|
#           $ALREADY_SHOWED_HEADER = 1 unless $opt_sum_reports;
 | 
						|
            # --sum-reports yields two xml or yaml files, one by
 | 
						|
            # language and one by report file, each of which needs a header
 | 
						|
        }
 | 
						|
        if ($opt_xml) {
 | 
						|
            if ($BY_FILE or ($report_type eq "by report file")) {
 | 
						|
                push @results, "<files>";
 | 
						|
            } else {
 | 
						|
                push @results, "<languages>";
 | 
						|
            }
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
        $header_line =~ s/,// if $opt_csv;
 | 
						|
        push @results, output_header($header_line, $hyphen_line, $BY_FILE);
 | 
						|
    }
 | 
						|
 | 
						|
    if ($Style eq "txt") {
 | 
						|
        # column headers
 | 
						|
        if (!$opt_3 and $BY_FILE) {
 | 
						|
            my $spacing_n = $spacing_1 - 11;
 | 
						|
            $data_line  = sprintf "%-${spacing_n}s ", $first_column;
 | 
						|
        } else {
 | 
						|
            $data_line  = sprintf "%-${spacing_1}s ", $first_column;
 | 
						|
        }
 | 
						|
        if ($BY_FILE) {
 | 
						|
            $data_line .= sprintf "%${spacing_2}s "  , " "    ;
 | 
						|
        } else {
 | 
						|
            $data_line .= sprintf "%${spacing_2}s "  , "files";
 | 
						|
        }
 | 
						|
        my $PCT_symbol = "";
 | 
						|
           $PCT_symbol = " \%" if $opt_by_percent;
 | 
						|
        $data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
 | 
						|
            "blank${PCT_symbol}"   ,
 | 
						|
            "comment${PCT_symbol}" ,
 | 
						|
            "code";
 | 
						|
        $data_line .= sprintf " %8s   %14s",
 | 
						|
            "scale"         ,
 | 
						|
            "3rd gen. equiv"
 | 
						|
              if $opt_3;
 | 
						|
        if ($opt_md) {
 | 
						|
            my @col_header  = ();
 | 
						|
            if ($data_line =~ m{\s%}) {
 | 
						|
                $data_line =~ s{\s%}{_%}g;
 | 
						|
                foreach my $w ( split(' ', $data_line) ) {
 | 
						|
                    $w =~ s{_%}{ %};
 | 
						|
                    push @col_header, $w;
 | 
						|
                }
 | 
						|
            } else {
 | 
						|
                push @col_header, split(' ', $data_line);
 | 
						|
            }
 | 
						|
            my @col_hyphens    = ( '-------:') x scalar(@col_header);
 | 
						|
               $col_hyphens[0] =   ':-------'; # first column left justified
 | 
						|
            push @results, join("|", @col_header );
 | 
						|
            push @results, join("|", @col_hyphens);
 | 
						|
        } else {
 | 
						|
            push @results, $data_line;
 | 
						|
            push @results, $hyphen_line;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    if ($opt_csv)  {
 | 
						|
        my $header2;
 | 
						|
        if ($BY_FILE) {
 | 
						|
            $header2 = "language${DELIM}filename";
 | 
						|
        } else {
 | 
						|
            $header2 = "files${DELIM}language";
 | 
						|
        }
 | 
						|
        $header2 .= "${DELIM}blank${DELIM}comment${DELIM}code";
 | 
						|
        $header2 .= "${DELIM}scale${DELIM}3rd gen. equiv" if $opt_3;
 | 
						|
        $header2 .= ${DELIM} . '"' . $header_line . '"';
 | 
						|
        push @results, $header2;
 | 
						|
    }
 | 
						|
 | 
						|
    my $sum_scaled = 0;
 | 
						|
    foreach my $lang_or_file (sort {
 | 
						|
                                 $rhh_count->{$b}{'code'} <=>
 | 
						|
                                 $rhh_count->{$a}{'code'}
 | 
						|
                              or $a cmp $b
 | 
						|
                                        }
 | 
						|
                                   keys %{$rhh_count}) {
 | 
						|
        next if $lang_or_file eq "by report file";
 | 
						|
        my ($factor, $scaled);
 | 
						|
        if ($BY_LANGUAGE or $BY_FILE) {
 | 
						|
            $factor = 1;
 | 
						|
            if ($BY_LANGUAGE) {
 | 
						|
                if (defined $rh_scale->{$lang_or_file}) {
 | 
						|
                    $factor = $rh_scale->{$lang_or_file};
 | 
						|
                } else {
 | 
						|
                    warn "No scale factor for $lang_or_file; using 1.00";
 | 
						|
                }
 | 
						|
            } else { # by individual code file
 | 
						|
                if ($report_type ne "by report file") {
 | 
						|
                    next unless defined $rhh_count->{$lang_or_file}{'lang'};
 | 
						|
                    next unless defined $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
 | 
						|
                    $factor = $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
 | 
						|
                }
 | 
						|
            }
 | 
						|
            $scaled = $factor*$rhh_count->{$lang_or_file}{'code'};
 | 
						|
        } else {
 | 
						|
            if (!defined $rhh_count->{$lang_or_file}{'scaled'}) {
 | 
						|
                $opt_3 = 0;
 | 
						|
                # If we're summing together files previously generated
 | 
						|
                # with --no3 then rhh_count->{$lang_or_file}{'scaled'}
 | 
						|
                # this variable will be undefined.  That should only
 | 
						|
                # happen when summing together by file however.
 | 
						|
            } elsif ($BY_LANGUAGE) {
 | 
						|
                warn "Missing scaled language info for $lang_or_file\n";
 | 
						|
            }
 | 
						|
            if ($opt_3) {
 | 
						|
                $scaled =         $rhh_count->{$lang_or_file}{'scaled'};
 | 
						|
                $factor = $scaled/$rhh_count->{$lang_or_file}{'code'};
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        if ($BY_FILE) {
 | 
						|
            my $clean_filename = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
 | 
						|
               $clean_filename = xml_metachars($clean_filename) if $opt_xml;
 | 
						|
            $data_line  = sprintf $Format{'1'}{$Style}, $clean_filename;
 | 
						|
        } else {
 | 
						|
            $data_line  = sprintf $Format{'2'}{$Style}, $lang_or_file;
 | 
						|
        }
 | 
						|
        $data_line .= sprintf $Format{3}{$Style}  ,
 | 
						|
                        $rhh_count->{$lang_or_file}{'nFiles'} unless $BY_FILE;
 | 
						|
        if ($opt_by_percent) {
 | 
						|
          my $DEN = compute_denominator($opt_by_percent       ,
 | 
						|
                        $rhh_count->{$lang_or_file}{'code'}   ,
 | 
						|
                        $rhh_count->{$lang_or_file}{'comment'},
 | 
						|
                        $rhh_count->{$lang_or_file}{'blank'}  );
 | 
						|
          $data_line .= sprintf $Format{5}{$Style}  ,
 | 
						|
              $rhh_count->{$lang_or_file}{'blank'}   / $DEN * 100,
 | 
						|
              $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100,
 | 
						|
              $rhh_count->{$lang_or_file}{'code'}   ;
 | 
						|
        } else {
 | 
						|
          $data_line .= sprintf $Format{4}{$Style}  ,
 | 
						|
              $rhh_count->{$lang_or_file}{'blank'}  ,
 | 
						|
              $rhh_count->{$lang_or_file}{'comment'},
 | 
						|
              $rhh_count->{$lang_or_file}{'code'}   ;
 | 
						|
        }
 | 
						|
        $data_line .= sprintf $Format{6}{$Style}  ,
 | 
						|
            $factor                               ,
 | 
						|
            $scaled if $opt_3;
 | 
						|
        $sum_scaled  += $scaled if $opt_3;
 | 
						|
 | 
						|
        if ($opt_xml) {
 | 
						|
            if (defined $rhh_count->{$lang_or_file}{'lang'}) {
 | 
						|
                my $lang = $rhh_count->{$lang_or_file}{'lang'};
 | 
						|
                if (!defined $languages->{$lang}) {
 | 
						|
                    $languages->{$lang} = $lang;
 | 
						|
                }
 | 
						|
                $data_line.=' language="' . $lang . '" ';
 | 
						|
            }
 | 
						|
            if ($BY_FILE or ($report_type eq "by report file")) {
 | 
						|
                push @results, "  <file " . $data_line . "/>";
 | 
						|
            } else {
 | 
						|
                push @results, "  <language " . $data_line . "/>";
 | 
						|
            }
 | 
						|
        } elsif ($opt_yaml or $opt_json) {
 | 
						|
            my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
 | 
						|
            if ($opt_yaml) {
 | 
						|
                # YAML: force quoted language or filename in case these
 | 
						|
                #       have embedded funny characters, issue #312
 | 
						|
                push @results,"'" . rm_leading_tempdir($lang_or_file, \%TEMP_DIR). "' :$open_B";
 | 
						|
            } else {
 | 
						|
                push @results,"${Q}" . rm_leading_tempdir($lang_or_file, \%TEMP_DIR). "${Q} :$open_B";
 | 
						|
            }
 | 
						|
            push @results,"  ${Q}nFiles${Q}: " . $rhh_count->{$lang_or_file}{'nFiles'} . $C
 | 
						|
                unless $BY_FILE;
 | 
						|
            if ($opt_by_percent) {
 | 
						|
              my $DEN = compute_denominator($opt_by_percent       ,
 | 
						|
                            $rhh_count->{$lang_or_file}{'code'}   ,
 | 
						|
                            $rhh_count->{$lang_or_file}{'comment'},
 | 
						|
                            $rhh_count->{$lang_or_file}{'blank'}  );
 | 
						|
              push @results,"  ${Q}blank_pct${Q}: "   .
 | 
						|
                sprintf("%3.2f", $rhh_count->{$lang_or_file}{'blank'} / $DEN * 100) . $C;
 | 
						|
              push @results,"  ${Q}comment_pct${Q}: " .
 | 
						|
                sprintf("%3.2f", $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100) . $C;
 | 
						|
              push @results,"  ${Q}code${Q}: "    . $rhh_count->{$lang_or_file}{'code'}  . $C;
 | 
						|
            } else {
 | 
						|
              push @results,"  ${Q}blank${Q}: "   . $rhh_count->{$lang_or_file}{'blank'}   . $C;
 | 
						|
              push @results,"  ${Q}comment${Q}: " . $rhh_count->{$lang_or_file}{'comment'} . $C;
 | 
						|
              push @results,"  ${Q}code${Q}: "    . $rhh_count->{$lang_or_file}{'code'}    . $C;
 | 
						|
            }
 | 
						|
            push @results,"  ${Q}language${Q}: "  . $Q . $rhh_count->{$lang_or_file}{'lang'} . $Q . $C
 | 
						|
                if $BY_FILE;
 | 
						|
            if ($opt_3) {
 | 
						|
                push @results, "  ${Q}scaled${Q}: " . $scaled . $C;
 | 
						|
                push @results, "  ${Q}factor${Q}: " . $factor . $C;
 | 
						|
            }
 | 
						|
            if ($opt_json) { # replace the trailing comma with }, on the last line
 | 
						|
                $results[-1] =~ s/,\s*$/},/;
 | 
						|
            }
 | 
						|
        } elsif ($opt_csv or $opt_md) {
 | 
						|
            my $extra_3 = "";
 | 
						|
               $extra_3 = "${DELIM}$factor${DELIM}$scaled" if $opt_3;
 | 
						|
            my $first_column = undef;
 | 
						|
            my $clean_name   = $lang_or_file;
 | 
						|
            my $str;
 | 
						|
            if ($opt_csv) {
 | 
						|
                if ($BY_FILE) {
 | 
						|
                    $first_column = $rhh_count->{$lang_or_file}{'lang'};
 | 
						|
                    $clean_name   = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
 | 
						|
                } else {
 | 
						|
                    $first_column = $rhh_count->{$lang_or_file}{'nFiles'};
 | 
						|
                }
 | 
						|
                $str = $first_column   . ${DELIM} .
 | 
						|
                       $clean_name     . ${DELIM};
 | 
						|
            } else {
 | 
						|
                if ($BY_FILE) {
 | 
						|
                    $first_column = $rhh_count->{$lang_or_file}{'lang'};
 | 
						|
                    $clean_name   = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
 | 
						|
                    $str = $clean_name . ${DELIM};
 | 
						|
                } else {
 | 
						|
                    $first_column = $rhh_count->{$lang_or_file}{'nFiles'};
 | 
						|
                    $str = $clean_name     . ${DELIM} .
 | 
						|
                           $first_column   . ${DELIM};
 | 
						|
                }
 | 
						|
            }
 | 
						|
            if ($opt_by_percent) {
 | 
						|
              my $DEN = compute_denominator($opt_by_percent               ,
 | 
						|
                            $rhh_count->{$lang_or_file}{'code'}   ,
 | 
						|
                            $rhh_count->{$lang_or_file}{'comment'},
 | 
						|
                            $rhh_count->{$lang_or_file}{'blank'}  );
 | 
						|
              $str .= sprintf("%3.2f", $rhh_count->{$lang_or_file}{'blank'}   / $DEN * 100) . ${DELIM} .
 | 
						|
                      sprintf("%3.2f", $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100) . ${DELIM} .
 | 
						|
                      $rhh_count->{$lang_or_file}{'code'};
 | 
						|
            } else {
 | 
						|
              $str .= $rhh_count->{$lang_or_file}{'blank'}  . ${DELIM} .
 | 
						|
                      $rhh_count->{$lang_or_file}{'comment'}. ${DELIM} .
 | 
						|
                      $rhh_count->{$lang_or_file}{'code'};
 | 
						|
            }
 | 
						|
            $str .= $extra_3;
 | 
						|
            push @results, $str;
 | 
						|
 | 
						|
        } else {
 | 
						|
            push @results, $data_line;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    my $avg_scale = 1;  # weighted average of scale factors
 | 
						|
       $avg_scale = sprintf("%.2f", $sum_scaled / $sum_code)
 | 
						|
            if $sum_code and $opt_3;
 | 
						|
 | 
						|
    if ($opt_xml) {
 | 
						|
        $data_line = "";
 | 
						|
        if (!$BY_FILE) {
 | 
						|
            $data_line .= sprintf "sum_files=\"%d\" ", $sum_files;
 | 
						|
        }
 | 
						|
        if ($opt_by_percent) {
 | 
						|
          my $DEN = compute_denominator($opt_by_percent    ,
 | 
						|
                        $sum_code, $sum_comment, $sum_blank);
 | 
						|
          $data_line .= sprintf $Format{'5'}{$Style},
 | 
						|
              $sum_blank   / $DEN * 100,
 | 
						|
              $sum_comment / $DEN * 100,
 | 
						|
              $sum_code    ;
 | 
						|
        } else {
 | 
						|
          $data_line .= sprintf $Format{'4'}{$Style},
 | 
						|
              $sum_blank   ,
 | 
						|
              $sum_comment ,
 | 
						|
              $sum_code    ;
 | 
						|
        }
 | 
						|
        $data_line .= sprintf $Format{'6'}{$Style},
 | 
						|
            $avg_scale   ,
 | 
						|
            $sum_scaled  if $opt_3;
 | 
						|
        push @results, "  <total " . $data_line . "/>";
 | 
						|
 | 
						|
        if ($BY_FILE or ($report_type eq "by report file")) {
 | 
						|
            push @results, "</files>";
 | 
						|
        } else {
 | 
						|
            foreach my $language (keys %{$languages}) {
 | 
						|
                push @results, '  <language name="' . $language . '"/>';
 | 
						|
            }
 | 
						|
            push @results, "</languages>";
 | 
						|
        }
 | 
						|
 | 
						|
        if (!$opt_by_file_by_lang or $ALREADY_SHOWED_XML_SECTION) {
 | 
						|
            push @results, "</results>";
 | 
						|
        } else {
 | 
						|
            $ALREADY_SHOWED_XML_SECTION = 1;
 | 
						|
        }
 | 
						|
    } elsif ($opt_yaml or $opt_json) {
 | 
						|
        my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
 | 
						|
        push @results, "${Q}SUM${Q}: ${open_B}";
 | 
						|
        if ($opt_by_percent) {
 | 
						|
          my $DEN = compute_denominator($opt_by_percent    ,
 | 
						|
                        $sum_code, $sum_comment, $sum_blank);
 | 
						|
          push @results, "  ${Q}blank${Q}: "  . sprintf("%.2f", $sum_blank   / $DEN * 100) . $C;
 | 
						|
          push @results, "  ${Q}comment${Q}: ". sprintf("%.2f", $sum_comment / $DEN * 100) . $C;
 | 
						|
          push @results, "  ${Q}code${Q}: "   . $sum_code    . $C;
 | 
						|
        } else {
 | 
						|
          push @results, "  ${Q}blank${Q}: "  . $sum_blank   . $C;
 | 
						|
          push @results, "  ${Q}comment${Q}: ". $sum_comment . $C;
 | 
						|
          push @results, "  ${Q}code${Q}: "   . $sum_code    . $C;
 | 
						|
        }
 | 
						|
        push @results, "  ${Q}nFiles${Q}: " . $sum_files   . $C;
 | 
						|
        if ($opt_3) {
 | 
						|
            push @results, "  ${Q}scaled${Q}: " . $sum_scaled . $C;
 | 
						|
            push @results, "  ${Q}factor${Q}: " . $avg_scale  . $C;
 | 
						|
        }
 | 
						|
        if ($opt_json) {
 | 
						|
            $results[-1] =~ s/,\s*$/} }/;
 | 
						|
            if ($opt_by_file_by_lang) {
 | 
						|
                if ($ALREADY_SHOWED_HEADER) {
 | 
						|
                    $results[-1] .= ' }';
 | 
						|
                } else {
 | 
						|
                    $results[-1] .= ', "by_lang" : {';
 | 
						|
                }
 | 
						|
            }
 | 
						|
        }
 | 
						|
    } elsif ($opt_csv) {
 | 
						|
        my @entries = ();
 | 
						|
        if ($opt_by_file) {
 | 
						|
            push @entries, "SUM";
 | 
						|
            push @entries, "";
 | 
						|
        } else {
 | 
						|
            push @entries, $sum_files;
 | 
						|
            push @entries, "SUM";
 | 
						|
        }
 | 
						|
        if ($opt_by_percent) {
 | 
						|
            my $DEN = compute_denominator($opt_by_percent    ,
 | 
						|
                          $sum_code, $sum_comment, $sum_blank);
 | 
						|
            push @entries, sprintf("%.2f", $sum_blank   / $DEN * 100);
 | 
						|
            push @entries, sprintf("%.2f", $sum_comment / $DEN * 100);
 | 
						|
        } else {
 | 
						|
            push @entries, $sum_blank;
 | 
						|
            push @entries, $sum_comment;
 | 
						|
        }
 | 
						|
        push @entries, $sum_code;
 | 
						|
        if ($opt_3) {
 | 
						|
            push @entries, $sum_scaled;
 | 
						|
            push @entries, $avg_scale ;
 | 
						|
        }
 | 
						|
        my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ",";
 | 
						|
        push @results, join($separator, @entries);
 | 
						|
    } else {
 | 
						|
 | 
						|
        if ($BY_FILE) {
 | 
						|
            $data_line  = sprintf "%-${spacing_0}s ", "SUM:"  ;
 | 
						|
        } else {
 | 
						|
            $data_line  = sprintf "%-${spacing_1}s ", "SUM:"  ;
 | 
						|
            $data_line .= sprintf "%${spacing_2}d ", $sum_files;
 | 
						|
        }
 | 
						|
        if ($opt_by_percent) {
 | 
						|
          my $DEN = compute_denominator($opt_by_percent    ,
 | 
						|
                        $sum_code, $sum_comment, $sum_blank);
 | 
						|
          $data_line .= sprintf $Format{'5'}{$Style},
 | 
						|
              $sum_blank   / $DEN * 100,
 | 
						|
              $sum_comment / $DEN * 100,
 | 
						|
              $sum_code    ;
 | 
						|
        } else {
 | 
						|
          $data_line .= sprintf $Format{'4'}{$Style},
 | 
						|
              $sum_blank   ,
 | 
						|
              $sum_comment ,
 | 
						|
              $sum_code    ;
 | 
						|
        }
 | 
						|
        $data_line .= sprintf $Format{'6'}{$Style},
 | 
						|
            $avg_scale   ,
 | 
						|
            $sum_scaled if $opt_3;
 | 
						|
        if ($opt_md) {
 | 
						|
            my @words = split(' ', $data_line);
 | 
						|
            my $n_cols = scalar(@words);
 | 
						|
#           my $n_cols = scalar(split(' ', $data_line));  # deprecated
 | 
						|
            $data_line =~ s/\s+/\|/g;
 | 
						|
            my @col_hyphens    = ( '--------') x $n_cols;
 | 
						|
            push @results, join("|", @col_hyphens);
 | 
						|
            push @results, $data_line   if $sum_files > 1 or $opt_sum_one;
 | 
						|
            unshift @results, ( "cloc|$header_line", "--- | ---", "", );
 | 
						|
        } else {
 | 
						|
            push @results, $hyphen_line if $sum_files > 1 or $opt_sum_one;
 | 
						|
            push @results, $data_line   if $sum_files > 1 or $opt_sum_one;
 | 
						|
            push @results, $hyphen_line;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
 | 
						|
    $ALREADY_SHOWED_HEADER = 1 unless $opt_sum_reports;
 | 
						|
    print "<- generate_report\n" if $opt_v > 2;
 | 
						|
    return @results;
 | 
						|
} # 1}}}
 | 
						|
sub print_errors {                           # {{{1
 | 
						|
    my ($rh_Error_Codes, # in
 | 
						|
        $raa_errors    , # in
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> print_errors\n" if $opt_v > 2;
 | 
						|
    my %error_string = reverse(%{$rh_Error_Codes});
 | 
						|
    my $nErrors      = scalar @{$raa_errors};
 | 
						|
    warn sprintf "\n%d error%s:\n", plural_form(scalar @Errors);
 | 
						|
    for (my $i = 0; $i < $nErrors; $i++) {
 | 
						|
        warn sprintf "%s:  %s\n",
 | 
						|
                     $error_string{ $raa_errors->[$i][0] },
 | 
						|
                     $raa_errors->[$i][1] ;
 | 
						|
    }
 | 
						|
    print "<- print_errors\n" if $opt_v > 2;
 | 
						|
 | 
						|
} # 1}}}
 | 
						|
sub write_lang_def {                         # {{{1
 | 
						|
    my ($file                     ,
 | 
						|
        $rh_Language_by_Extension , # in
 | 
						|
        $rh_Language_by_Script    , # in
 | 
						|
        $rh_Language_by_File      , # in
 | 
						|
        $rhaa_Filters_by_Language , # in
 | 
						|
        $rh_Not_Code_Extension    , # in
 | 
						|
        $rh_Not_Code_Filename     , # in
 | 
						|
        $rh_Scale_Factor          , # in
 | 
						|
        $rh_EOL_Continuation_re   , # in
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> write_lang_def($file)\n" if $opt_v > 2;
 | 
						|
    my @outlines = ();
 | 
						|
 | 
						|
    foreach my $language (sort keys %{$rhaa_Filters_by_Language}) {
 | 
						|
        next if $language =~ /(Brain|\(unknown\))/;
 | 
						|
        next if defined $Extension_Collision{$language};
 | 
						|
        push @outlines, $language;
 | 
						|
        foreach my $filter (@{$rhaa_Filters_by_Language->{$language}}) {
 | 
						|
            my $line = "";
 | 
						|
            $line .= sprintf "    filter %s", $filter->[0];
 | 
						|
            $line .= sprintf " %s", $filter->[1] if defined $filter->[1];
 | 
						|
            # $filter->[0] == 'remove_between_general',
 | 
						|
            #                 'remove_between_regex', and
 | 
						|
            #                 'remove_matches_2re' have two args
 | 
						|
            $line .= sprintf " %s", $filter->[2] if defined $filter->[2];
 | 
						|
            # $filter->[0] == 'replace_between_regex' has three or four args
 | 
						|
            $line .= sprintf " %s", $filter->[3] if defined $filter->[3];
 | 
						|
            $line .= sprintf " %s", $filter->[4] if defined $filter->[4];
 | 
						|
            push @outlines, $line;
 | 
						|
        }
 | 
						|
 | 
						|
        # file extension won't appear if the extension maps to
 | 
						|
        # multiple languages; work around this
 | 
						|
        my $found = 0;
 | 
						|
        foreach my $ext (sort keys %{$rh_Language_by_Extension}) {
 | 
						|
            if ($language eq $rh_Language_by_Extension->{$ext}) {
 | 
						|
                push @outlines, sprintf "    extension %s\n", $ext;
 | 
						|
                $found = 1;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        if (!$found and $opt_write_lang_def_incl_dup) {
 | 
						|
            foreach my $multilang (sort keys %Extension_Collision) {
 | 
						|
                my %Languages = map { $_ => 1 } split('/', $multilang);
 | 
						|
                next unless $Languages{$language};
 | 
						|
                foreach my $ext (@{$Extension_Collision{$multilang}}) {
 | 
						|
                    push @outlines, sprintf "    extension %s\n", $ext;
 | 
						|
                }
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        foreach my $filename (sort keys %{$rh_Language_by_File}) {
 | 
						|
            if ($language eq $rh_Language_by_File->{$filename}) {
 | 
						|
                push @outlines, sprintf "    filename %s\n", $filename;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        foreach my $script_exe (sort keys %{$rh_Language_by_Script}) {
 | 
						|
            if ($language eq $rh_Language_by_Script->{$script_exe}) {
 | 
						|
                push @outlines, sprintf "    script_exe %s\n", $script_exe;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        push @outlines, sprintf "    3rd_gen_scale %.2f\n", $rh_Scale_Factor->{$language};
 | 
						|
        if (defined $rh_EOL_Continuation_re->{$language}) {
 | 
						|
            push @outlines, sprintf "    end_of_line_continuation %s\n",
 | 
						|
                $rh_EOL_Continuation_re->{$language};
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    write_file($file, {}, @outlines);
 | 
						|
    print "<- write_lang_def\n" if $opt_v > 2;
 | 
						|
} # 1}}}
 | 
						|
sub read_lang_def {                          # {{{1
 | 
						|
    my ($file                     ,
 | 
						|
        $rh_Language_by_Extension , # out
 | 
						|
        $rh_Language_by_Script    , # out
 | 
						|
        $rh_Language_by_File      , # out
 | 
						|
        $rhaa_Filters_by_Language , # out
 | 
						|
        $rh_Not_Code_Extension    , # out
 | 
						|
        $rh_Not_Code_Filename     , # out
 | 
						|
        $rh_Scale_Factor          , # out
 | 
						|
        $rh_EOL_Continuation_re   , # out
 | 
						|
        $rh_EOL_abc,
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
 | 
						|
    print "-> read_lang_def($file)\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $language = "";
 | 
						|
    my @lines = read_file($file);
 | 
						|
    foreach (@lines) {
 | 
						|
        next if /^\s*#/ or /^\s*$/;
 | 
						|
 | 
						|
        $_ = lc $_ if $ON_WINDOWS and /^\s+(filename|extension)\s/;
 | 
						|
 | 
						|
        if (/^(\w+.*?)\s*$/) {
 | 
						|
            $language = $1;
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        die "Missing computer language name, line $. of $file\n"
 | 
						|
            unless $language;
 | 
						|
 | 
						|
        if      (/^\s{4}filter\s+(remove_between_(general|2re|regex))
 | 
						|
                       \s+(\S+)\s+(\S+)\s*$/x) {
 | 
						|
            push @{$rhaa_Filters_by_Language->{$language}}, [
 | 
						|
                  $1 , $3 , $4 ]
 | 
						|
 | 
						|
        } elsif (/^\s{4}filter\s+(replace_between_regex)
 | 
						|
                   \s+(\S+)\s+(\S+)\s+(\S+|\".*\")\s+(\S+)\s*$/x) {
 | 
						|
            push @{$rhaa_Filters_by_Language->{$language}}, [
 | 
						|
                  $1 , $2 , $3 , $4 , $5]
 | 
						|
 | 
						|
        } elsif (/^\s{4}filter\s+(replace_between_regex)
 | 
						|
                       \s+(\S+)\s+(\S+)\s+(.*?)\s*$/x) {
 | 
						|
            push @{$rhaa_Filters_by_Language->{$language}}, [
 | 
						|
                  $1 , $2 , $3 , $4 ]
 | 
						|
 | 
						|
        } elsif (/^\s{4}filter\s+(replace_regex)\s+(\S+)\s*$/) {
 | 
						|
            push @{$rhaa_Filters_by_Language->{$language}}, [
 | 
						|
                  $1 , $2 , '' ]
 | 
						|
 | 
						|
        } elsif (/^\s{4}filter\s+(replace_regex)
 | 
						|
                       \s+(\S+)\s+(.+?)\s*$/x) {
 | 
						|
            push @{$rhaa_Filters_by_Language->{$language}}, [
 | 
						|
                  $1 , $2 , $3 ]
 | 
						|
 | 
						|
        } elsif (/^\s{4}filter\s+(\w+)\s*$/) {
 | 
						|
            push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ]
 | 
						|
 | 
						|
        } elsif (/^\s{4}filter\s+(\w+)\s+(.*?)\s*$/) {
 | 
						|
            push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ]
 | 
						|
 | 
						|
        } elsif (/^\s{4}extension\s+(\S+)\s*$/) {
 | 
						|
            if (defined $rh_Language_by_Extension->{$1}) {
 | 
						|
                die "File extension collision:  $1 ",
 | 
						|
                    "maps to languages '$rh_Language_by_Extension->{$1}' ",
 | 
						|
                    "and '$language'\n" ,
 | 
						|
                    "Edit $file and remove $1 from one of these two ",
 | 
						|
                    "language definitions.\n";
 | 
						|
            }
 | 
						|
            $rh_Language_by_Extension->{$1} = $language;
 | 
						|
 | 
						|
        } elsif (/^\s{4}filename\s+(\S+)\s*$/) {
 | 
						|
            $rh_Language_by_File->{$1} = $language;
 | 
						|
 | 
						|
        } elsif (/^\s{4}script_exe\s+(\S+)\s*$/) {
 | 
						|
            $rh_Language_by_Script->{$1} = $language;
 | 
						|
 | 
						|
        } elsif (/^\s{4}3rd_gen_scale\s+(\S+)\s*$/) {
 | 
						|
            $rh_Scale_Factor->{$language} = $1;
 | 
						|
 | 
						|
        } elsif (/^\s{4}end_of_line_continuation\s+(\S+)\s*$/) {
 | 
						|
            $rh_EOL_Continuation_re->{$language} = $1;
 | 
						|
 | 
						|
        } else {
 | 
						|
            die "Unexpected data line $. of $file:\n$_\n";
 | 
						|
        }
 | 
						|
 | 
						|
    }
 | 
						|
    print "<- read_lang_def\n" if $opt_v > 2;
 | 
						|
} # 1}}}
 | 
						|
sub merge_lang_def {                         # {{{1
 | 
						|
    my ($file                     ,
 | 
						|
        $rh_Language_by_Extension , # in/out
 | 
						|
        $rh_Language_by_Script    , # in/out
 | 
						|
        $rh_Language_by_File      , # in/out
 | 
						|
        $rhaa_Filters_by_Language , # in/out
 | 
						|
        $rh_Not_Code_Extension    , # in/out
 | 
						|
        $rh_Not_Code_Filename     , # in/out
 | 
						|
        $rh_Scale_Factor          , # in/out
 | 
						|
        $rh_EOL_Continuation_re   , # in/out
 | 
						|
        $rh_EOL_abc,
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
 | 
						|
    print "-> merge_lang_def($file)\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $language        = "";
 | 
						|
    my $already_know_it = undef;
 | 
						|
    my @lines = read_file($file);
 | 
						|
    foreach (@lines) {
 | 
						|
        next if /^\s*#/ or /^\s*$/;
 | 
						|
 | 
						|
        $_ = lc $_ if $ON_WINDOWS and /^\s+(filename|extension)\s/;
 | 
						|
 | 
						|
        if (/^(\w+.*?)\s*$/) {
 | 
						|
            $language = $1;
 | 
						|
            $already_know_it = defined $rh_Scale_Factor->{$language};
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        die "Missing computer language name, line $. of $file\n"
 | 
						|
            unless $language;
 | 
						|
 | 
						|
        if      (/^\s{4}filter\s+(remove_between_(general|2re|regex))
 | 
						|
                       \s+(\S+)\s+(\S+)\s*$/x) {
 | 
						|
            next if $already_know_it;
 | 
						|
            push @{$rhaa_Filters_by_Language->{$language}}, [
 | 
						|
                  $1 , $3 , $4 ]
 | 
						|
 | 
						|
        } elsif (/^\s{4}filter\s+(replace_between_regex)
 | 
						|
                   \s+(\S+)\s+(\S+)\s+(\S+|\".*\")\s+(\S+)\s*$/x) {
 | 
						|
            next if $already_know_it;
 | 
						|
            push @{$rhaa_Filters_by_Language->{$language}}, [
 | 
						|
                  $1 , $2 , $3 , $4 , $5]
 | 
						|
 | 
						|
        } elsif (/^\s{4}filter\s+(replace_between_regex)
 | 
						|
                       \s+(\S+)\s+(\S+)\s+(.*?)\s*$/x) {
 | 
						|
            next if $already_know_it;
 | 
						|
            push @{$rhaa_Filters_by_Language->{$language}}, [
 | 
						|
                  $1 , $2 , $3 , $4 ]
 | 
						|
 | 
						|
        } elsif (/^\s{4}filter\s+(replace_regex)
 | 
						|
                       \s+(\S+)\s+(.+?)\s*$/x) {
 | 
						|
            next if $already_know_it;
 | 
						|
            push @{$rhaa_Filters_by_Language->{$language}}, [
 | 
						|
                  $1 , $2 , $3 ]
 | 
						|
 | 
						|
        } elsif (/^\s{4}filter\s+(\w+)\s*$/) {
 | 
						|
            next if $already_know_it;
 | 
						|
            push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ]
 | 
						|
 | 
						|
        } elsif (/^\s{4}filter\s+(\w+)\s+(.*?)\s*$/) {
 | 
						|
            next if $already_know_it;
 | 
						|
            push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ]
 | 
						|
 | 
						|
        } elsif (/^\s{4}extension\s+(\S+)\s*$/) {
 | 
						|
            next if $already_know_it;
 | 
						|
            if (defined $rh_Language_by_Extension->{$1}) {
 | 
						|
                die "File extension collision:  $1 ",
 | 
						|
                    "maps to languages '$rh_Language_by_Extension->{$1}' ",
 | 
						|
                    "and '$language'\n" ,
 | 
						|
                    "Edit $file and remove $1 from one of these two ",
 | 
						|
                    "language definitions.\n";
 | 
						|
            }
 | 
						|
            $rh_Language_by_Extension->{$1} = $language;
 | 
						|
 | 
						|
        } elsif (/^\s{4}filename\s+(\S+)\s*$/) {
 | 
						|
            next if $already_know_it;
 | 
						|
            $rh_Language_by_File->{$1} = $language;
 | 
						|
 | 
						|
        } elsif (/^\s{4}script_exe\s+(\S+)\s*$/) {
 | 
						|
            next if $already_know_it;
 | 
						|
            $rh_Language_by_Script->{$1} = $language;
 | 
						|
 | 
						|
        } elsif (/^\s{4}3rd_gen_scale\s+(\S+)\s*$/) {
 | 
						|
            next if $already_know_it;
 | 
						|
            $rh_Scale_Factor->{$language} = $1;
 | 
						|
 | 
						|
        } elsif (/^\s{4}end_of_line_continuation\s+(\S+)\s*$/) {
 | 
						|
            next if $already_know_it;
 | 
						|
            $rh_EOL_Continuation_re->{$language} = $1;
 | 
						|
 | 
						|
        } else {
 | 
						|
            die "Unexpected data line $. of $file:\n$_\n";
 | 
						|
        }
 | 
						|
 | 
						|
    }
 | 
						|
    print "<- merge_lang_def\n" if $opt_v > 2;
 | 
						|
} # 1}}}
 | 
						|
sub print_extension_info {                   # {{{1
 | 
						|
    my ($extension,) = @_;
 | 
						|
    if ($extension) {  # show information on this extension
 | 
						|
        foreach my $ext (sort {lc $a cmp lc $b or $a cmp $b } keys %Language_by_Extension) {
 | 
						|
            # Language_by_Extension{f}    = 'Fortran 77'
 | 
						|
            next if $Language_by_Extension{$ext} =~ /Brain/;
 | 
						|
            printf "%-15s -> %s\n", $ext, $Language_by_Extension{$ext}
 | 
						|
                if $ext =~ m{$extension}i;
 | 
						|
        }
 | 
						|
    } else {           # show information on all  extensions
 | 
						|
        foreach my $ext (sort {lc $a cmp lc $b or $a cmp $b } keys %Language_by_Extension) {
 | 
						|
            next if $Language_by_Extension{$ext} =~ /Brain/;
 | 
						|
            # Language_by_Extension{f}    = 'Fortran 77'
 | 
						|
            printf "%-15s -> %s\n", $ext, $Language_by_Extension{$ext};
 | 
						|
        }
 | 
						|
    }
 | 
						|
} # 1}}}
 | 
						|
sub print_language_info {                    # {{{1
 | 
						|
    my ($language,
 | 
						|
        $prefix ,) = @_;
 | 
						|
    my %extensions = (); # the subset matched by the given $language value
 | 
						|
    if ($language) {  # show information on this language
 | 
						|
        foreach my $ext (sort {lc $a cmp lc $b or $a cmp $b } keys %Language_by_Extension) {
 | 
						|
            # Language_by_Extension{f}    = 'Fortran 77'
 | 
						|
            push @{$extensions{$Language_by_Extension{$ext}} }, $ext
 | 
						|
                if lc $Language_by_Extension{$ext} eq lc $language;
 | 
						|
#               if $Language_by_Extension{$ext} =~ m{$language}i;
 | 
						|
        }
 | 
						|
    } else {          # show information on all  languages
 | 
						|
        foreach my $ext (sort {lc $a cmp lc $b  or $a cmp $b } keys %Language_by_Extension) {
 | 
						|
            # Language_by_Extension{f}    = 'Fortran 77'
 | 
						|
            push @{$extensions{$Language_by_Extension{$ext}} }, $ext
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    # add exceptions (one file extension mapping to multiple languages)
 | 
						|
    if (!$language or $language =~ /^(Objective-C|MATLAB|Mathematica|MUMPS|Mercury)$/i) {
 | 
						|
        push @{$extensions{'Objective-C'}}, "m";
 | 
						|
        push @{$extensions{'MATLAB'}}     , "m";
 | 
						|
        push @{$extensions{'Mathematica'}}, "m";
 | 
						|
        push @{$extensions{'MUMPS'}}      , "m";
 | 
						|
        delete $extensions{'MATLAB/Mathematica/Objective-C/MUMPS/Mercury'};
 | 
						|
    }
 | 
						|
    if (!$language or $language =~ /^(Lisp|OpenCL)$/i) {
 | 
						|
        push @{$extensions{'Lisp'}}  , "cl";
 | 
						|
        push @{$extensions{'OpenCL'}}, "cl";
 | 
						|
        delete $extensions{'Lisp/OpenCL'};
 | 
						|
    }
 | 
						|
    if (!$language or $language =~ /^(Lisp|Julia)$/i) {
 | 
						|
        push @{$extensions{'Lisp'}}  , "jl";
 | 
						|
        push @{$extensions{'Julia'}} , "jl";
 | 
						|
        delete $extensions{'Lisp/Julia'};
 | 
						|
    }
 | 
						|
    if (!$language or $language =~ /^(Perl|Prolog)$/i) {
 | 
						|
        push @{$extensions{'Perl'}}  , "pl";
 | 
						|
        push @{$extensions{'Prolog'}}, "pl";
 | 
						|
        delete $extensions{'Perl/Prolog'};
 | 
						|
    }
 | 
						|
    if (!$language or $language =~ /^(Raku|Prolog)$/i) {
 | 
						|
        push @{$extensions{'Perl'}}  , "p6";
 | 
						|
        push @{$extensions{'Prolog'}}, "p6";
 | 
						|
        delete $extensions{'Perl/Prolog'};
 | 
						|
    }
 | 
						|
    if (!$language or $language =~ /^(IDL|Qt Project|Prolog|ProGuard)$/i) {
 | 
						|
        push @{$extensions{'IDL'}}       , "pro";
 | 
						|
        push @{$extensions{'Qt Project'}}, "pro";
 | 
						|
        push @{$extensions{'Prolog'}}    , "pro";
 | 
						|
        push @{$extensions{'ProGuard'}}  , "pro";
 | 
						|
        delete $extensions{'IDL/Qt Project/Prolog/ProGuard'};
 | 
						|
    }
 | 
						|
    if (!$language or $language =~ /^(D|dtrace)$/i) {
 | 
						|
        push @{$extensions{'D'}}       , "d";
 | 
						|
        push @{$extensions{'dtrace'}}  , "d";
 | 
						|
        delete $extensions{'D/dtrace'};
 | 
						|
    }
 | 
						|
    if (!$language or $language =~ /^Forth$/) {
 | 
						|
        push @{$extensions{'Forth'}}     , "fs";
 | 
						|
        push @{$extensions{'Forth'}}     , "f";
 | 
						|
        push @{$extensions{'Forth'}}     , "for";
 | 
						|
        delete $extensions{'Fortran 77/Forth'};
 | 
						|
        delete $extensions{'F#/Forth'};
 | 
						|
    }
 | 
						|
    if (!$language or $language =~ /^Fortran 77$/) {
 | 
						|
        push @{$extensions{'Fortran 77'}}, "f";
 | 
						|
        push @{$extensions{'Fortran 77'}}, "for";
 | 
						|
        push @{$extensions{'F#'}}        , "fs";
 | 
						|
        delete $extensions{'Fortran 77/Forth'};
 | 
						|
    }
 | 
						|
    if (!$language or $language =~ /^F#$/) {
 | 
						|
        push @{$extensions{'F#'}}        , "fs";
 | 
						|
        delete $extensions{'F#/Forth'};
 | 
						|
    }
 | 
						|
    if (!$language or $language =~ /^(Verilog-SystemVerilog|Coq)$/) {
 | 
						|
        push @{$extensions{'Coq'}}                   , "v";
 | 
						|
        push @{$extensions{'Verilog-SystemVerilog'}} , "v";
 | 
						|
        delete $extensions{'Verilog-SystemVerilog/Coq'};
 | 
						|
    }
 | 
						|
    if (!$language or $language =~ /^(TypeScript|Qt Linguist)$/) {
 | 
						|
        push @{$extensions{'TypeScript'}}  , "ts";
 | 
						|
        push @{$extensions{'Qt Linguist'}} , "ts";
 | 
						|
        delete $extensions{'TypeScript/Qt Linguist'};
 | 
						|
    }
 | 
						|
    if (!$language or $language =~ /^(Qt|Glade)$/) {
 | 
						|
        push @{$extensions{'Glade'}} , "ui";
 | 
						|
        push @{$extensions{'Qt'}}    , "ui";
 | 
						|
        delete $extensions{'Qt/Glade'};
 | 
						|
    }
 | 
						|
    if (!$language or $language =~ /^(C#|Smalltalk)$/) {
 | 
						|
        push @{$extensions{'C#'}}           , "cs";
 | 
						|
        push @{$extensions{'Smalltalk'}}    , "cs";
 | 
						|
        delete $extensions{'C#/Smalltalk'};
 | 
						|
    }
 | 
						|
    if (!$language or $language =~ /^(Visual\s+Basic|TeX|Apex\s+Class)$/i) {
 | 
						|
        push @{$extensions{'Visual Basic'}} , "cls";
 | 
						|
        push @{$extensions{'TeX'}}          , "cls";
 | 
						|
        push @{$extensions{'Apex Class'}}   , "cls";
 | 
						|
        delete $extensions{'Visual Basic/TeX/Apex Class'};
 | 
						|
    }
 | 
						|
    if (!$language or $language =~ /^(Ant)$/i) {
 | 
						|
        push @{$extensions{'Ant'}}  , "build.xml";
 | 
						|
        delete $extensions{'Ant/XML'};
 | 
						|
    }
 | 
						|
    if (!$language or $language =~ /^(Scheme|SaltStack)$/i) {
 | 
						|
        push @{$extensions{'Scheme'}}    , "sls";
 | 
						|
        push @{$extensions{'SaltStack'}} , "sls";
 | 
						|
        delete $extensions{'Scheme/SaltStack'};
 | 
						|
    }
 | 
						|
    if ($opt_explain) {
 | 
						|
        return unless $extensions{$language};
 | 
						|
        if ($prefix) {
 | 
						|
            printf "%s %s\n", $prefix, join(", ", @{$extensions{$language}});
 | 
						|
        } else {
 | 
						|
            printf "%-26s (%s)\n", $language, join(", ", @{$extensions{$language}});
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
        if (%extensions) {
 | 
						|
            foreach my $lang (sort {lc $a cmp lc $b } keys %extensions) {
 | 
						|
                next if $lang =~ /Brain/;
 | 
						|
                if ($prefix) {
 | 
						|
                    printf "%s %s\n", $prefix, join(", ", @{$extensions{$lang}});
 | 
						|
                } else {
 | 
						|
                    printf "%-26s (%s)\n", $lang, join(", ", @{$extensions{$lang}});
 | 
						|
                }
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
} # 1}}}
 | 
						|
sub print_language_filters {                 # {{{1
 | 
						|
    my ($language,) = @_;
 | 
						|
    if (!$Filters_by_Language{$language} or
 | 
						|
        !@{$Filters_by_Language{$language}}) {
 | 
						|
        warn "Unknown language: $language\n";
 | 
						|
        warn "Use --show-lang to list all defined languages.\n";
 | 
						|
        return;
 | 
						|
    }
 | 
						|
    printf "%s\n", $language;
 | 
						|
    foreach my $filter (@{$Filters_by_Language{$language}}) {
 | 
						|
        printf "    filter %s", $filter->[0];
 | 
						|
        printf "  %s", $filter->[1] if defined $filter->[1];
 | 
						|
        printf "  %s", $filter->[2] if defined $filter->[2];
 | 
						|
        printf "  %s", $filter->[3] if defined $filter->[3];
 | 
						|
        printf "  %s", $filter->[4] if defined $filter->[4];
 | 
						|
        print  "\n";
 | 
						|
    }
 | 
						|
    print_language_info($language, "    extensions:");
 | 
						|
} # 1}}}
 | 
						|
sub top_level_SMB_dir {                      # {{{1
 | 
						|
    # Ref https://github.com/AlDanial/cloc/issues/392, if the
 | 
						|
    # user supplies a directory name which is an SMB mount
 | 
						|
    # point, this directory will appear to File::Find as
 | 
						|
    # though it is empty unless $File::Find::dont_use_nlink
 | 
						|
    # is set to 1.  This subroutine checks to see if any SMB
 | 
						|
    # mounts (identified from stat()'s fourth entry, nlink,
 | 
						|
    # having a value of 2) were passed in on the command line.
 | 
						|
 | 
						|
    my ($ra_arg_list,) = @_;  # in user supplied file name, directory name, git hash, etc
 | 
						|
    foreach my $entry (@{$ra_arg_list}) {
 | 
						|
        next unless is_dir($entry);
 | 
						|
        # gets here if $entry is a directory; now get its nlink value
 | 
						|
        my $nlink;
 | 
						|
        if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | 
						|
            my $stats = statL($entry);
 | 
						|
            $nlink = $stats->{nlink} if defined $stats;
 | 
						|
        } else {
 | 
						|
            my @stats = stat($entry);
 | 
						|
            $nlink = $stats[3];
 | 
						|
        }
 | 
						|
        return 1 if $nlink == 2;  # meaning it is an SMB mount
 | 
						|
    }
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
# 1}}}
 | 
						|
sub get_git_metadata {                       # {{{1
 | 
						|
    my ($ra_arg_list,             # in  file name, directory name and/or
 | 
						|
                                  #     git commit hash to examine
 | 
						|
        $rh_git_metadata) = @_;   # out repo info
 | 
						|
    # Capture git information where possible--origin, branch, commit hash.
 | 
						|
    my $prt_args = join(",", @{$ra_arg_list});
 | 
						|
    print "-> get_git_metadata($prt_args)\n" if $opt_v > 2;
 | 
						|
    foreach my $arg (@{$ra_arg_list}) {
 | 
						|
        next if is_file($arg);
 | 
						|
        my $origin = `git remote get-url origin 2>&1`;
 | 
						|
        next if $origin =~ /^fatal:/;
 | 
						|
        chomp($rh_git_metadata->{$arg}{"origin"} = $origin);
 | 
						|
        chomp($rh_git_metadata->{$arg}{"branch"} = `git symbolic-ref --short HEAD`);
 | 
						|
        if (is_dir($arg)) {
 | 
						|
            chomp($rh_git_metadata->{$arg}{"commit"}   = `git rev-parse HEAD`);
 | 
						|
        } else {
 | 
						|
            chomp($rh_git_metadata->{$arg}{"commit"}   = `git rev-parse $arg`);
 | 
						|
        }
 | 
						|
    }
 | 
						|
    print "<- get_git_metadata()\n" if $opt_v > 2;
 | 
						|
} # 1}}}
 | 
						|
sub replace_git_hash_with_tarfile {          # {{{1
 | 
						|
    my ($ra_arg_list,             # in  file name, directory name and/or
 | 
						|
                                  #     git commit hash to examine
 | 
						|
        $ra_git_similarity) = @_; # out only if --opt-git-diff-simindex
 | 
						|
    # replace git hashes in $ra_arg_list with tar files
 | 
						|
    # Diff mode and count mode behave differently:
 | 
						|
    #   Diff:
 | 
						|
    #       file  git_hash
 | 
						|
    #          Extract file from the git repo and only compare to it.
 | 
						|
    #       git_hash1  git_hash2
 | 
						|
    #          Get listings of all files in git_hash1 and git_hash2.
 | 
						|
    #            git ls-tree --name-only -r *git_hash1*
 | 
						|
    #            git ls-tree --name-only -r *git_hash2*
 | 
						|
    #          Next, get listings of files that changed with git_hash1
 | 
						|
    #          and git_hash2.
 | 
						|
    #            git diff-tree -r --no-commit-id --name-only *git_hash1* *git_hash2*
 | 
						|
    #          Finally, make two tar files of git repos1 and 2 where the file
 | 
						|
    #          listing is the union of changes.
 | 
						|
    #            git archive -o tarfile1 *git_hash1* \
 | 
						|
    #               <union of files that changed and exist in this commit>
 | 
						|
    #            git archive -o tarfile2 *git_hash2* \
 | 
						|
    #               <union of files that changed and exist in this commit>
 | 
						|
    #          To avoid "Argument list too long" error, repeat the git
 | 
						|
    #          archive step with chunks of 30,000 files at a time then
 | 
						|
    #          merge the tar files as the final step.
 | 
						|
    #   Regular count:
 | 
						|
    #       Simply make a tar file of all files in the git repo.
 | 
						|
 | 
						|
    my $prt_args = join(",", @{$ra_arg_list});
 | 
						|
    print "-> replace_git_hash_with_tarfile($prt_args)\n" if $opt_v > 2;
 | 
						|
#print "ra_arg_list 1: @{$ra_arg_list}\n";
 | 
						|
 | 
						|
    my $hash_regex = qr/^([a-f\d]{5,40}|master|HEAD)(~\d+)?$/;
 | 
						|
    my %replacement_arg_list = ();
 | 
						|
 | 
						|
    # early exit if none of the inputs look like git hashes
 | 
						|
    my %git_hash = ();
 | 
						|
    my $i = 0;
 | 
						|
    foreach my $file_or_dir (@{$ra_arg_list}) {
 | 
						|
        ++$i;
 | 
						|
        if (can_read($file_or_dir)) { # readable file or dir; not a git hash
 | 
						|
            $replacement_arg_list{$i} = $file_or_dir;
 | 
						|
            next;
 | 
						|
        } elsif ($opt_force_git or $file_or_dir =~ m/$hash_regex/) {
 | 
						|
            $git_hash{$file_or_dir} = $i;
 | 
						|
        } # else the input can't be understood; ignore for now
 | 
						|
    }
 | 
						|
    return unless %git_hash;
 | 
						|
 | 
						|
#   my $have_tar_git = external_utility_exists($ON_WINDOWS ? "unzip" : "tar --version") &&
 | 
						|
    my $have_tar_git = external_utility_exists("tar --version") &&
 | 
						|
                       external_utility_exists("git --version");
 | 
						|
    if (!$have_tar_git) {
 | 
						|
        warn "One or more inputs looks like a git hash but " .
 | 
						|
             "either git or tar is unavailable.\n";
 | 
						|
        return;
 | 
						|
    }
 | 
						|
 | 
						|
    my %repo_listing = ();  # $repo_listing{hash}{files} = 1;
 | 
						|
    foreach my $hash (sort keys %git_hash) {
 | 
						|
        my $git_list_cmd = "git ls-tree --name-only -r ";
 | 
						|
        if ($hash =~ m/(.*?):(.*?)$/) {
 | 
						|
            # requesting specific file(s) from this hash; grep for them
 | 
						|
            # Note:  this capability not fully implemented yet
 | 
						|
            $git_list_cmd .= "$1|grep '$2'";
 | 
						|
        } else {
 | 
						|
            $git_list_cmd .= $hash;
 | 
						|
        }
 | 
						|
        print "$git_list_cmd\n" if $opt_v;
 | 
						|
        foreach my $file (`$git_list_cmd`) {
 | 
						|
            $file =~ s/\s+$//;
 | 
						|
            $repo_listing{$hash}{$file} = 1;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    # logic for each of the modes
 | 
						|
    if ($opt_diff) {
 | 
						|
#print "A DIFF\n";
 | 
						|
        # set the default git diff algorithm
 | 
						|
        $opt_git_diff_rel = 1 unless $opt_git_diff_all or
 | 
						|
                                     $opt_git_diff_simindex;
 | 
						|
        # is it git to git, or git to file/dir ?
 | 
						|
        my ($Left, $Right) = @{$ra_arg_list};
 | 
						|
 | 
						|
#use Data::Dumper;
 | 
						|
#print "diff_listing= "; print Dumper(\%diff_listing);
 | 
						|
#print "git_hash= "; print Dumper(\%git_hash);
 | 
						|
        if ($git_hash{$Left} and $git_hash{$Right}) {
 | 
						|
#print "A DIFF git-to-git\n";
 | 
						|
            # git to git
 | 
						|
            # first make a union of all files that have changed in both commits
 | 
						|
            my %files_union = ();
 | 
						|
 | 
						|
            my @left_files  = ();
 | 
						|
            my @right_files = ();
 | 
						|
            if ($opt_git_diff_rel) {
 | 
						|
                # Strategy 1:  Union files are what git consinders have changed
 | 
						|
                #              between the two commits.
 | 
						|
                my $git_list_cmd = "git diff-tree -r --no-commit-id --name-only $Left $Right";
 | 
						|
                # print "$git_list_cmd\n" if $opt_v;
 | 
						|
                foreach my $file (`$git_list_cmd`) {
 | 
						|
                    chomp($file);
 | 
						|
                    $files_union{$file} = 1;
 | 
						|
                }
 | 
						|
            } elsif ($opt_git_diff_all) {
 | 
						|
                # Strategy 2:  Union files all files in both repos.
 | 
						|
                foreach my $file (keys %{$repo_listing{$Left }},
 | 
						|
                                  keys %{$repo_listing{$Right}}) {
 | 
						|
                   $files_union{$file} = 1;
 | 
						|
                }
 | 
						|
            } elsif ($opt_git_diff_simindex) {
 | 
						|
                # Strategy 3:  Use git's own similarity index to figure
 | 
						|
                #              out which files to compare.
 | 
						|
                git_similarity_index($Left              , # in
 | 
						|
                                     $Right             , # in
 | 
						|
                                    \@left_files        , # out
 | 
						|
                                    \@right_files       , # out
 | 
						|
                                     $ra_git_similarity); # out
 | 
						|
 | 
						|
            }
 | 
						|
 | 
						|
            if ($opt_exclude_list_file) {
 | 
						|
                my @reject_list = read_list_file($opt_exclude_list_file);
 | 
						|
                my %entries_to_drop = ();
 | 
						|
                foreach my $f_or_d (@reject_list) {
 | 
						|
                    if (-d $f_or_d) {
 | 
						|
                        # directory match
 | 
						|
                        $entries_to_drop{$f_or_d} = 1;
 | 
						|
                        foreach my $file (keys %files_union) {
 | 
						|
                            $entries_to_drop{$file} = 1 if $file =~ m{$f_or_d/};
 | 
						|
                        }
 | 
						|
                    } else {
 | 
						|
                        # exact match
 | 
						|
                        $entries_to_drop{$f_or_d} = 1;
 | 
						|
                    }
 | 
						|
                }
 | 
						|
                foreach my $file (keys %entries_to_drop) {
 | 
						|
                    delete $files_union{$file};
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            if ($opt_list_file) {
 | 
						|
                my @include_list = read_list_file($opt_list_file);
 | 
						|
                my %entries_to_add  = ();
 | 
						|
                foreach my $f_or_d (@include_list) {
 | 
						|
                    if (-d $f_or_d) {
 | 
						|
                        # directory match
 | 
						|
                        $entries_to_add{$f_or_d} = 1;
 | 
						|
                        foreach my $file (keys %files_union) {
 | 
						|
                            $entries_to_add{$file} = 1 if $file =~ m{$f_or_d/};
 | 
						|
                        }
 | 
						|
                    } else {
 | 
						|
                        # exact match
 | 
						|
                        $entries_to_add{$f_or_d} = 1;
 | 
						|
                    }
 | 
						|
                }
 | 
						|
                %files_union = %entries_to_add; # and exclude all others
 | 
						|
            }
 | 
						|
 | 
						|
            # then make truncated tar files of those union files which
 | 
						|
            # actually exist in each repo
 | 
						|
            foreach my $file (sort keys %files_union) {
 | 
						|
                push @left_files , $file if $repo_listing{$Left }{$file};
 | 
						|
                push @right_files, $file if $repo_listing{$Right}{$file};
 | 
						|
            }
 | 
						|
            # backslash whitespace, weird chars within file names (#257, #284)
 | 
						|
 | 
						|
#           my @Lfiles= map {$_ =~ s/([\s\(\)\[\]{}';\^\$\?])/\\$1/g; $_}   @left_files;
 | 
						|
#           my @Lfiles= @left_files;
 | 
						|
            if(scalar(@left_files) > 0) {
 | 
						|
                $replacement_arg_list{$git_hash{$Left}}  = git_archive($Left , \@left_files);
 | 
						|
            } else {
 | 
						|
                # In the right side commit ONLY file(s) was added, so no file(s) will exist in the left side commit.
 | 
						|
                # Create empty TAR to detect added lines of code.
 | 
						|
                $replacement_arg_list{$git_hash{$Left}} = empty_tar();
 | 
						|
            }
 | 
						|
#           $replacement_arg_list{$git_hash{$Left}}  = git_archive($Left , \@Lfiles);
 | 
						|
#           my @Rfiles= map {$_ =~ s/([\s\(\)\[\]{}';\^\$\?])/\\$1/g; $_}   @right_files ;
 | 
						|
#           my @Rfiles= @right_files ;
 | 
						|
#use Data::Dumper;
 | 
						|
#print Dumper('left' , \@left_files);
 | 
						|
#print Dumper('right', \@right_files);
 | 
						|
#die;
 | 
						|
 | 
						|
            if(scalar(@right_files) > 0) {
 | 
						|
                $replacement_arg_list{$git_hash{$Right}} = git_archive($Right, \@right_files);
 | 
						|
            } else {
 | 
						|
                 # In the left side commit ONLY file(s) was deleted, so file(s) will not exist in right side commit.
 | 
						|
                 # Create empty TAR to detect removed lines of code.
 | 
						|
                 $replacement_arg_list{$git_hash{$Right}} = empty_tar();
 | 
						|
            }
 | 
						|
#           $replacement_arg_list{$git_hash{$Right}} = git_archive($Right, \@Rfiles);
 | 
						|
#write_file("/tmp/Lfiles.txt", {}, sort @Lfiles);
 | 
						|
#write_file("/tmp/Rfiles.txt", {}, sort @Rfiles);
 | 
						|
#write_file("/tmp/files_union.txt", {}, sort keys %files_union);
 | 
						|
 | 
						|
        } else {
 | 
						|
#print "A DIFF git-to-file or file-to-git Left=$Left Right=$Right\n";
 | 
						|
            # git to file/dir or file/dir to git
 | 
						|
            if      ($git_hash{$Left}  and $repo_listing{$Left}{$Right} ) {
 | 
						|
#print "A DIFF 1\n";
 | 
						|
                # $Left is a git hash and $Right is a file
 | 
						|
                $replacement_arg_list{$git_hash{$Left}}  = git_archive($Left, $Right);
 | 
						|
            } elsif ($git_hash{$Right} and $repo_listing{$Right}{$Left}) {
 | 
						|
#print "A DIFF 2\n";
 | 
						|
                # $Left is a file and $Right is a git hash
 | 
						|
                $replacement_arg_list{$git_hash{$Right}} = git_archive($Right, $Left);
 | 
						|
            } elsif ($git_hash{$Left}) {
 | 
						|
#print "A DIFF 3\n";
 | 
						|
                # assume Right is a directory; tar the entire git archive at this hash
 | 
						|
                $replacement_arg_list{$git_hash{$Left}}  = git_archive($Left, "");
 | 
						|
            } else {
 | 
						|
#print "A DIFF 4\n";
 | 
						|
                # assume Left  is a directory; tar the entire git archive at this hash
 | 
						|
                $replacement_arg_list{$git_hash{$Right}} = git_archive($Right, "");
 | 
						|
            }
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
#print "B COUNT\n";
 | 
						|
        foreach my $hash (sort keys %git_hash) {
 | 
						|
            $replacement_arg_list{$git_hash{$hash}} = git_archive($hash);
 | 
						|
        }
 | 
						|
    }
 | 
						|
# print "git_hash= "; print Dumper(\%git_hash);
 | 
						|
#print "repo_listing= "; print Dumper(\%repo_listing);
 | 
						|
 | 
						|
    # replace the input arg list with the new one
 | 
						|
    @{$ra_arg_list} = ();
 | 
						|
    foreach my $index (sort {$a <=> $b} keys %replacement_arg_list) {
 | 
						|
        push @{$ra_arg_list}, $replacement_arg_list{$index};
 | 
						|
    }
 | 
						|
 | 
						|
#print "ra_arg_list 2: @{$ra_arg_list}\n";
 | 
						|
    print "<- replace_git_hash_with_tarfile()\n" if $opt_v > 2;
 | 
						|
} # 1}}}
 | 
						|
sub git_similarity_index {                   # {{{
 | 
						|
    my ($git_hash_Left    ,       # in
 | 
						|
        $git_hash_Right   ,       # in
 | 
						|
        $ra_left_files    ,       # out
 | 
						|
        $ra_right_files   ,       # out
 | 
						|
        $ra_git_similarity) = @_; # out
 | 
						|
    die "this option is not yet implemented";
 | 
						|
    print "-> git_similarity_index($git_hash_Left, $git_hash_Right)\n" if $opt_v > 2;
 | 
						|
    my $cmd = "git diff -M --name-status $git_hash_Left $git_hash_Right";
 | 
						|
    print  $cmd, "\n" if $opt_v;
 | 
						|
    open(GSIM, "$cmd |") or die "Unable to run $cmd  $!";
 | 
						|
    while (<GSIM>) {
 | 
						|
        print "git similarity> $_";
 | 
						|
    }
 | 
						|
    close(GSIM);
 | 
						|
    print "<- git_similarity_index\n" if $opt_v > 2;
 | 
						|
} # 1}}}
 | 
						|
sub empty_tar {                              # {{{1
 | 
						|
    my ($Tarfh, $Tarfile);
 | 
						|
    if ($opt_sdir) {
 | 
						|
      File::Path::mkpath($opt_sdir) unless is_dir($opt_sdir);
 | 
						|
      ($Tarfh, $Tarfile) = tempfile(UNLINK => 1, DIR => $opt_sdir, SUFFIX => $ON_WINDOWS ? '.zip' : '.tar');  # delete on exit
 | 
						|
    } else {
 | 
						|
      ($Tarfh, $Tarfile) = tempfile(UNLINK => 1, SUFFIX => $ON_WINDOWS ? '.zip' : '.tar');  # delete on exit
 | 
						|
    }
 | 
						|
    my $cmd = $ON_WINDOWS ? "type nul > $Tarfile" : "tar -cf $Tarfile -T /dev/null";
 | 
						|
    print  $cmd, "\n" if $opt_v;
 | 
						|
    system $cmd;
 | 
						|
    if (!can_read($Tarfile)) {
 | 
						|
        # not readable
 | 
						|
        die "Failed to create empty tarfile.";
 | 
						|
    }
 | 
						|
 | 
						|
    return $Tarfile;
 | 
						|
} # 1}}}
 | 
						|
sub git_archive {                            # {{{1
 | 
						|
    # Invoke 'git archive' as a system command to create a tar file
 | 
						|
    # using the given argument(s).
 | 
						|
    my ($A1, $A2) = @_;
 | 
						|
    print "-> git_archive($A1)\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $args = undef;
 | 
						|
    my @File_Set = ( );
 | 
						|
    my $n_sets   = 1;
 | 
						|
    if (ref $A2 eq 'ARRAY') {
 | 
						|
        # Avoid "Argument list too long" for the git archive command
 | 
						|
        # by splitting the inputs into sets of 10,000 files (issue 273).
 | 
						|
        my $FILES_PER_ARCHIVE = 1_000;
 | 
						|
           $FILES_PER_ARCHIVE =   100 if $ON_WINDOWS; # github.com/AlDanial/cloc/issues/404
 | 
						|
 | 
						|
        my $n_files  = scalar(@{$A2});
 | 
						|
        $n_sets = $n_files/$FILES_PER_ARCHIVE;
 | 
						|
        $n_sets = 1 + int($n_sets) if $n_sets > int($n_sets);
 | 
						|
        $n_sets = 1 if !$n_sets;
 | 
						|
        foreach my $i (0..$n_sets-1) {
 | 
						|
            @{$File_Set[$i]} = ( );
 | 
						|
            my $start = $i*$FILES_PER_ARCHIVE;
 | 
						|
            my $end   = smaller(($i+1)*$FILES_PER_ARCHIVE, $n_files) - 1;
 | 
						|
            # Wrap each file name in single quotes to protect spaces
 | 
						|
            # and other odd characters.  File names that themselves have
 | 
						|
            # single quotes are instead wrapped in double quotes.  File
 | 
						|
            # names with both single and double quotes... jeez.
 | 
						|
            foreach my $fname (@{$A2}[$start .. $end]) {
 | 
						|
                if      ($fname =~ /^".*?\\".*?"$/) {
 | 
						|
                    # git pre-handles filenames with double quotes by backslashing
 | 
						|
                    # each double quote then surrounding entire name in double
 | 
						|
                    # quotes; undo this otherwise archive command crashes
 | 
						|
                    $fname =~ s/\\"/"/g;
 | 
						|
                    $fname =~ s/^"(.*)"$/$1/;
 | 
						|
                } elsif ($fname =~ /'/ or $ON_WINDOWS) {
 | 
						|
                    push @{$File_Set[$i]}, "\"$fname\"";
 | 
						|
                } else {
 | 
						|
                    push @{$File_Set[$i]}, "'$fname'";
 | 
						|
                }
 | 
						|
            }
 | 
						|
            unshift @{$File_Set[$i]}, "$A1 ";  # prepend git hash to beginning of list
 | 
						|
 | 
						|
##xx#        # don't include \$ in the regex because git handles these correctly
 | 
						|
#            # to each word in @{$A2}[$start .. $end]: first backslash each
 | 
						|
#            # single quote, then wrap all entries in single quotes (#320)
 | 
						|
#            push @File_Set,
 | 
						|
#                 "$A1 " . join(" ", map {$_ =~ s/'/\'/g; $_ =~ s/^(.*)$/'$1'/g; $_}
 | 
						|
##                "$A1 " . join(" ", map {$_ =~ s/([\s\(\)\[\]{}';\^\?])/\\$1/g; $_}
 | 
						|
#                              @{$A2}[$start .. $end]);
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
        if (defined $A2) {
 | 
						|
            push @{$File_Set[0]}, "$A1 $A2";
 | 
						|
        } else {
 | 
						|
            push @{$File_Set[0]}, "$A1";
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    my $files_this_commit = join(" ", @{$File_Set[0]});
 | 
						|
    print "   git_archive(file_set[0]=$files_this_commit)\n" if $opt_v > 2;
 | 
						|
    my ($Tarfh, $Tarfile);
 | 
						|
    if ($opt_sdir) {
 | 
						|
      File::Path::mkpath($opt_sdir) unless is_dir($opt_sdir);
 | 
						|
      ($Tarfh, $Tarfile) = tempfile(UNLINK => 1, DIR => $opt_sdir, SUFFIX => '.tar');  # delete on exit
 | 
						|
    } else {
 | 
						|
      ($Tarfh, $Tarfile) = tempfile(UNLINK => 1, SUFFIX => '.tar');  # delete on exit
 | 
						|
    }
 | 
						|
    my $cmd = "git archive -o $Tarfile $files_this_commit";
 | 
						|
    print  $cmd, "\n" if $opt_v;
 | 
						|
    system $cmd;
 | 
						|
    if (!can_read($Tarfile) or !get_size($Tarfile)) {
 | 
						|
        # not readable, or zero sized
 | 
						|
        die "Failed to create tarfile of files from git.";
 | 
						|
    }
 | 
						|
    if ($n_sets > 1) {
 | 
						|
        if ($ON_WINDOWS) {
 | 
						|
            my @tar_files = ( $Tarfile );
 | 
						|
            my $start_dir = cwd;
 | 
						|
            foreach my $i (1..$n_sets-1) {
 | 
						|
                my $fname = sprintf "%s_extra_%08d", $Tarfile, $i;
 | 
						|
                my $files_this_commit = join(" ", @{$File_Set[$i]});
 | 
						|
                my $cmd = "git archive -o $fname $files_this_commit";
 | 
						|
                print  $cmd, "\n" if $opt_v;
 | 
						|
                system $cmd;
 | 
						|
                push @tar_files, $fname;
 | 
						|
            }
 | 
						|
            # Windows tar can't combine tar files so expand
 | 
						|
            # them all to one directory then re-tar
 | 
						|
            my $extract_dir = tempdir( CLEANUP => 0 );  # 1 = delete on exit
 | 
						|
            chdir "$extract_dir";
 | 
						|
            foreach my $T (@tar_files) {
 | 
						|
                next unless is_file($T) and get_size($T);
 | 
						|
                my $cmd = "tar -x -f \"$T\"";
 | 
						|
                print  $cmd, "\n" if $opt_v;
 | 
						|
                system $cmd;
 | 
						|
                unlink "$T";
 | 
						|
            }
 | 
						|
            chdir "..";
 | 
						|
            $Tarfile .= ".final.tar";
 | 
						|
            my $cmd = "tar -c -f \"${Tarfile}\" \"$extract_dir\"";
 | 
						|
            print  $cmd, "\n" if $opt_v;
 | 
						|
            system $cmd;
 | 
						|
            chdir "$start_dir";
 | 
						|
        } else {
 | 
						|
            foreach my $i (1..$n_sets-1) {
 | 
						|
                my $files_this_commit = join(" ", @{$File_Set[$i]});
 | 
						|
                my $cmd = "git archive -o ${Tarfile}_extra $files_this_commit";
 | 
						|
                print  $cmd, "\n" if $opt_v;
 | 
						|
                system $cmd;
 | 
						|
                # and merge into the first one
 | 
						|
                $cmd = "tar -A -f ${Tarfile} ${Tarfile}_extra";
 | 
						|
                print  $cmd, "\n" if $opt_v;
 | 
						|
                system $cmd;
 | 
						|
            }
 | 
						|
            unlink "${Tarfile}_extra";
 | 
						|
        }
 | 
						|
    }
 | 
						|
    print "<- git_archive() made $Tarfile\n" if $opt_v > 2;
 | 
						|
    return $Tarfile
 | 
						|
} # 1}}}
 | 
						|
sub smaller {                                # {{{1
 | 
						|
    my( $a, $b ) = @_;
 | 
						|
    return $a < $b ? $a : $b;
 | 
						|
} # 1}}}
 | 
						|
sub lower_on_Windows {                       # {{{1
 | 
						|
    # If on Unix(-like), do nothing, just return the input.
 | 
						|
    # If on Windows, return a lowercase version of the file
 | 
						|
    # and also update %upper_lower_map with this new entry.
 | 
						|
    # Needed in make_file_list() because the full file list
 | 
						|
    # isn't known until the end of that routine--where
 | 
						|
    # %upper_lower_map is ordinarily populated.
 | 
						|
    my ($path,) = @_;
 | 
						|
    return $path unless $ON_WINDOWS;
 | 
						|
    my $lower = lc $path;
 | 
						|
    $upper_lower_map{$lower} = $path;
 | 
						|
    return $lower;
 | 
						|
}
 | 
						|
# }}}
 | 
						|
sub make_file_list {                         # {{{1
 | 
						|
    my ($ra_arg_list,  # in   file and/or directory names to examine
 | 
						|
        $iteration  ,  # in   0 if only called once, 1 or 2 if twice for diff
 | 
						|
        $rh_Err     ,  # in   hash of error codes
 | 
						|
        $raa_errors ,  # out  errors encountered
 | 
						|
        $rh_ignored ,  # out  files not recognized as computer languages
 | 
						|
        ) = @_;
 | 
						|
    print "-> make_file_list(@{$ra_arg_list})\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ",";
 | 
						|
    my ($fh, $filename);
 | 
						|
    if ($opt_categorized) {
 | 
						|
        if ($iteration) {
 | 
						|
            # am being called twice for diff of Left and Right
 | 
						|
            my $ext = $iteration == 1 ? "L" : "R";
 | 
						|
            $filename = $opt_categorized . "-$ext";
 | 
						|
        } else {
 | 
						|
            $filename = $opt_categorized;
 | 
						|
        }
 | 
						|
        $fh = open_file('+>', $filename, 1);  # open for read/write
 | 
						|
        die "Unable to write to $filename:  $!\n" unless defined $fh;
 | 
						|
    } elsif ($opt_sdir) {
 | 
						|
        # write to the user-defined scratch directory
 | 
						|
        ++$TEMP_OFF;
 | 
						|
        my $scr_dir = "$opt_sdir/$TEMP_OFF";
 | 
						|
        File::Path::mkpath($scr_dir) unless is_dir($scr_dir);
 | 
						|
        $filename = $scr_dir . '/cloc_file_list.txt';
 | 
						|
        $fh = open_file('+>', $filename, 1);  # open for read/write
 | 
						|
        die "Unable to write to $filename:  $!\n" unless defined $fh;
 | 
						|
    } else {
 | 
						|
        # let File::Temp create a suitable temporary file
 | 
						|
        ($fh, $filename) = tempfile(UNLINK => 1);  # delete file on exit
 | 
						|
        print "Using temp file list [$filename]\n" if $opt_v;
 | 
						|
    }
 | 
						|
 | 
						|
    my @dir_list = ();
 | 
						|
    foreach my $file_or_dir (@{$ra_arg_list}) {
 | 
						|
        my $size_in_bytes = 0;
 | 
						|
        my $F = lower_on_Windows($file_or_dir);
 | 
						|
        my $ul_F = $ON_WINDOWS ? $upper_lower_map{$F} : $F;
 | 
						|
        if (!can_read($file_or_dir)) {
 | 
						|
            push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $F];
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        if (is_file($file_or_dir)) {
 | 
						|
            if (!get_size($file_or_dir)) {   # 0 sized file, named pipe, socket
 | 
						|
                $rh_ignored->{$F} = 'zero sized file';
 | 
						|
                next;
 | 
						|
            } elsif (is_binary($file_or_dir) and !$opt_read_binary_files) {
 | 
						|
                # avoid binary files unless user insists on reading them
 | 
						|
                if ($opt_unicode) {
 | 
						|
                    # only ignore if not a Unicode file w/trivial
 | 
						|
                    # ASCII transliteration
 | 
						|
                    if (!unicode_file($file_or_dir)) {
 | 
						|
                        $rh_ignored->{$ul_F} = 'binary file';
 | 
						|
                        next;
 | 
						|
                    }
 | 
						|
                } else {
 | 
						|
                    $rh_ignored->{$ul_F} = 'binary file';
 | 
						|
                    next;
 | 
						|
                }
 | 
						|
            }
 | 
						|
            push @file_list, "$file_or_dir";
 | 
						|
        } elsif (is_dir($file_or_dir)) {
 | 
						|
            push @dir_list, $file_or_dir;
 | 
						|
        } else {
 | 
						|
            push @{$raa_errors}, [$rh_Err->{'Neither file nor directory'} , $F];
 | 
						|
            $rh_ignored->{$F} = 'not file, not directory';
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    # apply exclusion rules to file names passed in on the command line
 | 
						|
    my @new_file_list = ();
 | 
						|
    foreach my $File (@file_list) {
 | 
						|
        my ($volume, $directories, $filename) = File::Spec->splitpath( $File );
 | 
						|
        my $ignore_this_file = 0;
 | 
						|
        foreach my $Sub_Dir ( File::Spec->splitdir($directories) ) {
 | 
						|
            my $SD = lower_on_Windows($Sub_Dir);
 | 
						|
            if ($Exclude_Dir{$Sub_Dir}) {
 | 
						|
                $Ignored{$SD} = "($File) --exclude-dir=$Sub_Dir";
 | 
						|
                $ignore_this_file = 1;
 | 
						|
                last;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        push @new_file_list, $File unless $ignore_this_file;
 | 
						|
    }
 | 
						|
    @file_list = @new_file_list;
 | 
						|
    foreach my $dir (@dir_list) {
 | 
						|
        my $D = lower_on_Windows($dir);
 | 
						|
#print "make_file_list dir=$dir  Exclude_Dir{$dir}=$Exclude_Dir{$dir}\n";
 | 
						|
        # populates global variable @file_list
 | 
						|
        if ($Exclude_Dir{$dir}) {
 | 
						|
            $Ignored{$D} = "--exclude-dir=$Exclude_Dir{$dir}";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        if ($opt_no_recurse) {
 | 
						|
            if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | 
						|
                my $d = Win32::LongPath->new();
 | 
						|
                $d->opendirL($dir);
 | 
						|
                foreach my $entry ($d->readdirL()) {
 | 
						|
                    my $F = "$dir/$entry";
 | 
						|
                    push @file_list, $F if is_file($F);
 | 
						|
                }
 | 
						|
                $d->closedirL();
 | 
						|
            } else {
 | 
						|
                opendir(DIR, $dir);
 | 
						|
                push @file_list, grep(is_file($_), readdir(DIR));
 | 
						|
                closedir(DIR);
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            find({wanted     => \&files            ,
 | 
						|
                  preprocess => \&find_preprocessor,
 | 
						|
                  follow     =>  $opt_follow_links }, $dir);
 | 
						|
        }
 | 
						|
    }
 | 
						|
    if ($opt_follow_links) {
 | 
						|
        # giving { 'follow' => 1 } to find() makes it skip the
 | 
						|
        # call to find_preprocessor() so have to call this manually
 | 
						|
        @file_list = manual_find_preprocessor(@file_list);
 | 
						|
    }
 | 
						|
 | 
						|
    # there's a possibility of file duplication if user provided a list
 | 
						|
    # file or --vcs command that returns directory names; squash these
 | 
						|
    my %unique_file_list = map { $_ => 1 } @file_list;
 | 
						|
    @file_list = sort keys %unique_file_list;
 | 
						|
 | 
						|
    $nFiles_Found = scalar @file_list;
 | 
						|
    printf "%8d text file%s.\n", plural_form($nFiles_Found) unless $opt_quiet;
 | 
						|
    write_file($opt_found, {}, sort @file_list) if $opt_found;
 | 
						|
 | 
						|
    my $nFiles_Categorized = 0;
 | 
						|
 | 
						|
    foreach my $file (@file_list) {
 | 
						|
        my $F = lower_on_Windows($file);
 | 
						|
        if ($ON_WINDOWS) {
 | 
						|
            (my $lc = lc $file) =~ s{\\}{/}g;
 | 
						|
            $upper_lower_map{$lc} = $file;
 | 
						|
            $file = $lc;
 | 
						|
        }
 | 
						|
        printf "classifying $file\n" if $opt_v > 2;
 | 
						|
 | 
						|
        my $basename = basename $file;
 | 
						|
        if ($Not_Code_Filename{$basename}) {
 | 
						|
            $rh_ignored->{$F} = "listed in " . '$' .
 | 
						|
                "Not_Code_Filename{$basename}";
 | 
						|
            next;
 | 
						|
        } elsif ($basename =~ m{~$}) {
 | 
						|
            $rh_ignored->{$F} = "temporary editor file";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
 | 
						|
        my $size_in_bytes;
 | 
						|
        if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | 
						|
            my $stats = statL($file);
 | 
						|
            $size_in_bytes = $stats->{size} if defined $stats;
 | 
						|
        } else {
 | 
						|
            $size_in_bytes = (stat $file)[7];
 | 
						|
        }
 | 
						|
        my $language      = "";
 | 
						|
        if ($All_One_Language) {
 | 
						|
            # user over-rode auto-language detection by using
 | 
						|
            # --force-lang with just a language name (no extension)
 | 
						|
            $language      = $All_One_Language;
 | 
						|
        } else {
 | 
						|
            $language      = classify_file($file      ,
 | 
						|
                                           $rh_Err    ,
 | 
						|
                                           $raa_errors,
 | 
						|
                                           $rh_ignored);
 | 
						|
        }
 | 
						|
        if (!defined $size_in_bytes) {
 | 
						|
            $rh_ignored->{$F} = "no longer readable";
 | 
						|
            next;
 | 
						|
        } elsif (!defined $language) {
 | 
						|
            $rh_ignored->{$F} = "unable to associate with a language";
 | 
						|
            next;
 | 
						|
        } elsif ($language eq "(unknown)") {
 | 
						|
            # entry should already be in %{$rh_ignored}
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        printf $fh "%d%s%s%s%s\n", $size_in_bytes, $separator,
 | 
						|
                                   $language, $separator, $file;
 | 
						|
        ++$nFiles_Categorized;
 | 
						|
        #printf "classified %d files\n", $nFiles_Categorized
 | 
						|
        #    unless (!$opt_progress_rate or
 | 
						|
        #            ($nFiles_Categorized % $opt_progress_rate));
 | 
						|
    }
 | 
						|
    printf "classified %d files\r", $nFiles_Categorized
 | 
						|
        if !$opt_quiet and $nFiles_Categorized > 1;
 | 
						|
    print "<- make_file_list()\n" if $opt_v > 2;
 | 
						|
 | 
						|
    return $fh;   # handle to the file containing the list of files to process
 | 
						|
}  # 1}}}
 | 
						|
sub invoke_generator {                       # {{{1
 | 
						|
    my ($generator, $ra_user_inputs) = @_;
 | 
						|
    # If user provided file/directory inputs, only return
 | 
						|
    # generated files that are in user's request.
 | 
						|
    # Populates global variable %Ignored.
 | 
						|
    print "-> invoke_generator($generator, @{$ra_user_inputs})\n" if $opt_v > 2;
 | 
						|
    my $start_dir = cwd();
 | 
						|
    my @dir_list  = ();
 | 
						|
    my @file_list = ();
 | 
						|
 | 
						|
    if (!@{$ra_user_inputs}) {
 | 
						|
        # input must be a generator command, ie "find -type f -name '*.c'"
 | 
						|
        # issued from the cwd
 | 
						|
        push @dir_list, ".";
 | 
						|
    }
 | 
						|
    foreach my $file_dir (@{$ra_user_inputs}) {
 | 
						|
        if (is_dir($file_dir)) {
 | 
						|
            push @dir_list, $file_dir;
 | 
						|
        } elsif (is_file($file_dir)) {
 | 
						|
            push @file_list, $file_dir;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    my @files = ();
 | 
						|
    foreach my $work_dir (@dir_list) {
 | 
						|
        if ($work_dir ne $start_dir) {
 | 
						|
            chdir $work_dir or die "Failed to chdir to $work_dir: $!";
 | 
						|
        }
 | 
						|
        open(FH, "$generator |") or die "Failed to pipe $generator: $!";
 | 
						|
        while(<FH>) {
 | 
						|
            chomp;
 | 
						|
            my $F = "$work_dir/$_";
 | 
						|
            print "VCS input:  $F\n" if $opt_v >= 2;
 | 
						|
            if (!@file_list) {
 | 
						|
                # no files given on the command line; accept all
 | 
						|
                push @files, $F;
 | 
						|
            } else {
 | 
						|
                # is this file desired?
 | 
						|
                my $want_this_one = 0;
 | 
						|
                foreach my $file (@file_list) {
 | 
						|
                    $file =~ s{\\}{/}g if $ON_WINDOWS;
 | 
						|
                    if (/^$file/) {
 | 
						|
                        $want_this_one = 1;
 | 
						|
                        last;
 | 
						|
                    }
 | 
						|
                }
 | 
						|
                push @files, $F if $want_this_one;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        close(FH);
 | 
						|
        if ($work_dir ne $start_dir) {
 | 
						|
            chdir $start_dir or die "Failed to chdir to $start_dir: $!";
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    # apply match/not-match file/dir filters to the list so far
 | 
						|
    my @post_filter = ();
 | 
						|
    foreach my $F (@files) {
 | 
						|
        if ($opt_match_f) {
 | 
						|
            push @post_filter, $F if basename($F) =~ m{$opt_match_f};
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        if ($opt_match_d) {
 | 
						|
            push @post_filter, $F if $F =~ m{$opt_match_d};
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        if (@opt_not_match_d) {
 | 
						|
            my $rule;
 | 
						|
            if ($opt_fullpath and any_match($F, 0, \$rule, @opt_not_match_d)) {
 | 
						|
                $Ignored{$F} = "--not-match-d=$rule";
 | 
						|
                next;
 | 
						|
            } elsif (!$opt_fullpath and any_match(basename($F), 0, \$rule, @opt_not_match_d)) {
 | 
						|
                $Ignored{$F} = "--not-match-d (basename) =$rule";
 | 
						|
                next;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        if (@opt_not_match_f) {
 | 
						|
            my $rule;
 | 
						|
            if ($opt_fullpath and any_match($F, 0, \$rule, @opt_not_match_f)) {
 | 
						|
                $Ignored{$F} = "--not-match-f=$rule";
 | 
						|
                next;
 | 
						|
            } elsif (!$opt_fullpath and any_match(basename($F), 0, \$rule, @opt_not_match_f)) {
 | 
						|
                $Ignored{$F} = "--not-match-f (basename) =$rule";
 | 
						|
                next;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        my $nBytes = get_size($F);
 | 
						|
        if (!$nBytes) {
 | 
						|
            $Ignored{$F} = 'zero sized file';
 | 
						|
            printf "files(%s)  zero size\n", $F if $opt_v > 5;
 | 
						|
        }
 | 
						|
        next unless $nBytes;
 | 
						|
        if ($nBytes > $opt_max_file_size*1024**2) {
 | 
						|
            $Ignored{$F} = "file size of " .
 | 
						|
                $nBytes/1024**2 . " MB exceeds max file size of " .
 | 
						|
                "$opt_max_file_size MB";
 | 
						|
            printf "file(%s)  exceeds $opt_max_file_size MB\n",
 | 
						|
                $F if $opt_v > 5;
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        my $is_bin = is_binary($F);
 | 
						|
        printf "files(%s)  size=%d  -B=%d\n",
 | 
						|
            $F, $nBytes, $is_bin if $opt_v > 5;
 | 
						|
        $is_bin = 0 if $opt_unicode and unicode_file($_);
 | 
						|
        $is_bin = 0 if $opt_read_binary_files;
 | 
						|
        next if $is_bin;
 | 
						|
        push @post_filter, $F;
 | 
						|
    }
 | 
						|
    print "<- invoke_generator\n" if $opt_v > 2;
 | 
						|
    return @post_filter;
 | 
						|
} # 1}}}
 | 
						|
sub any_match {                              # {{{1
 | 
						|
    my ($string, $entire, $rs_matched_pattern, @patterns) = @_;
 | 
						|
    print "-> any_match($string, $entire)\n" if $opt_v > 2;
 | 
						|
    foreach my $pattern (@patterns) {
 | 
						|
        if ($entire) {
 | 
						|
            if ($string =~ m{^${pattern}$}) {
 | 
						|
                ${$rs_matched_pattern} = $pattern;
 | 
						|
                print "<- any_match(1)\n" if $opt_v > 2;
 | 
						|
                return 1;
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            if ($string =~ m{$pattern}) {
 | 
						|
                ${$rs_matched_pattern} = $pattern;
 | 
						|
                print "<- any_match(1)\n" if $opt_v > 2;
 | 
						|
                return 1;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
    print "<- any_match(0)\n" if $opt_v > 2;
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
# }}}
 | 
						|
sub remove_duplicate_files {                 # {{{1
 | 
						|
    my ($fh                   , # in
 | 
						|
        $rh_Language          , # out
 | 
						|
        $rh_unique_source_file, # out
 | 
						|
        $rh_Err               , # in
 | 
						|
        $raa_errors           , # out  errors encountered
 | 
						|
        $rh_ignored           , # out
 | 
						|
        ) = @_;
 | 
						|
 | 
						|
    # Check for duplicate files by comparing file sizes.
 | 
						|
    # Where files are equally sized, compare their MD5 checksums.
 | 
						|
    print "-> remove_duplicate_files\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ",";
 | 
						|
    my $n = 0;
 | 
						|
    my %files_by_size = (); # files_by_size{ # bytes } = [ list of files ]
 | 
						|
    seek($fh, 0, 0); # rewind to beginning of the temp file
 | 
						|
    while (<$fh>) {
 | 
						|
        ++$n;
 | 
						|
        my ($size_in_bytes, $language, $file) = split(/\Q$separator\E/, $_, 3);
 | 
						|
        if (!defined($size_in_bytes) or
 | 
						|
            !defined($language)      or
 | 
						|
            !defined($file)) {
 | 
						|
            print "-> remove_duplicate_files skipping error line [$_]\n"
 | 
						|
                if $opt_v;
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        chomp($file);
 | 
						|
        $file =~ s{\\}{/}g if $ON_WINDOWS;
 | 
						|
        $rh_Language->{$file} = $language;
 | 
						|
        push @{$files_by_size{$size_in_bytes}}, $file;
 | 
						|
        if ($opt_skip_uniqueness) {
 | 
						|
            $rh_unique_source_file->{$file} = 1;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return if $opt_skip_uniqueness;
 | 
						|
    if ($opt_progress_rate and ($n > $opt_progress_rate)) {
 | 
						|
        printf "Duplicate file check %d files (%d known unique)\r",
 | 
						|
            $n, scalar keys %files_by_size;
 | 
						|
    }
 | 
						|
    $n = 0;
 | 
						|
    foreach my $bytes (sort {$a <=> $b} keys %files_by_size) {
 | 
						|
        ++$n;
 | 
						|
        printf "Unique: %8d files                                          \r",
 | 
						|
            $n unless (!$opt_progress_rate or ($n % $opt_progress_rate));
 | 
						|
        if (scalar @{$files_by_size{$bytes}} == 1) {
 | 
						|
            # only one file is this big; must be unique
 | 
						|
            $rh_unique_source_file->{$files_by_size{$bytes}[0]} = 1;
 | 
						|
            next;
 | 
						|
        } else {
 | 
						|
#print "equally sized files: ",join(", ", @{$files_by_size{$bytes}}), "\n";
 | 
						|
            # Files in the list @{$files_by_size{$bytes} all are
 | 
						|
            # $bytes long.  Sort the list by file basename.
 | 
						|
 | 
						|
          # # sorting on basename causes repeatability problems
 | 
						|
          # # if the basename is not unique (eg "includeA/x.h"
 | 
						|
          # # and "includeB/x.h".  Instead, sort on full path.
 | 
						|
          # # Ref bug #114.
 | 
						|
          # my @sorted_bn = ();
 | 
						|
          # my %BN = map { basename($_) => $_ } @{$files_by_size{$bytes}};
 | 
						|
          # foreach my $F (sort keys %BN) {
 | 
						|
          #     push @sorted_bn, $BN{$F};
 | 
						|
          # }
 | 
						|
 | 
						|
            my @sorted_bn = sort @{$files_by_size{$bytes}};
 | 
						|
 | 
						|
            foreach my $F (different_files(\@sorted_bn  ,
 | 
						|
                                            $rh_Err     ,
 | 
						|
                                            $raa_errors ,
 | 
						|
                                            $rh_ignored ) ) {
 | 
						|
                $rh_unique_source_file->{$F} = 1;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
    print "<- remove_duplicate_files\n" if $opt_v > 2;
 | 
						|
} # 1}}}
 | 
						|
sub manual_find_preprocessor {               # {{{1
 | 
						|
    # When running with --follow-links, find_preprocessor() is not
 | 
						|
    # called by find().  Have to do it manually.  Inputs here
 | 
						|
    # are only files, which differs from find_preprocessor() which
 | 
						|
    # gets directories too.
 | 
						|
    # Reads global variable %Exclude_Dir.
 | 
						|
    # Populates global variable %Ignored.
 | 
						|
    # Reject files/directories in cwd which are in the exclude list.
 | 
						|
    print "-> manual_find_preprocessor(", cwd(), ")\n" if $opt_v > 2;
 | 
						|
    my @ok = ();
 | 
						|
 | 
						|
    foreach my $File (@_) {  # pure file or directory name, no separators
 | 
						|
        my $Dir = dirname($File);
 | 
						|
        my $got_exclude_dir = 0;
 | 
						|
        foreach my $d (File::Spec->splitdir( $Dir )) {
 | 
						|
            # tests/inputs/issues/407/level2/level/Test/level2 ->
 | 
						|
            # $d iterates over tests, inputs, issues, 407,
 | 
						|
            #                  level2, level, Test, level2
 | 
						|
            # check every item against %Exclude_Dir
 | 
						|
            if ($Exclude_Dir{$d}) {
 | 
						|
                $got_exclude_dir = $d;
 | 
						|
                last;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        if ($got_exclude_dir) {
 | 
						|
            $Ignored{$File} = "--exclude-dir=$Exclude_Dir{$got_exclude_dir}";
 | 
						|
#print "ignoring $File\n";
 | 
						|
        } else {
 | 
						|
            if (@opt_not_match_d) {
 | 
						|
                my $rule;
 | 
						|
                if ($opt_fullpath) {
 | 
						|
                    if (any_match($Dir, 1, \$rule, @opt_not_match_d)) {
 | 
						|
                        $Ignored{$File} = "--not-match-d=$rule";
 | 
						|
#print "matched fullpath\n"
 | 
						|
                    } else {
 | 
						|
                        push @ok, $File;
 | 
						|
                    }
 | 
						|
                } elsif (any_match(basename($Dir), 0, \$rule, @opt_not_match_d)) {
 | 
						|
                    $Ignored{$File} = "--not-match-d=$rule";
 | 
						|
#print "matched partial\n"
 | 
						|
                } else {
 | 
						|
                    push @ok, $File;
 | 
						|
                }
 | 
						|
            } else {
 | 
						|
                push @ok, $File;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- manual_find_preprocessor(@ok)\n" if $opt_v > 2;
 | 
						|
    return @ok;
 | 
						|
} # 1}}}
 | 
						|
sub find_preprocessor {                      # {{{1
 | 
						|
    # invoked by File::Find's find() each time it enters a new directory
 | 
						|
    # Reads global variable %Exclude_Dir.
 | 
						|
    # Populates global variable %Ignored.
 | 
						|
    # Reject files/directories in cwd which are in the exclude list.
 | 
						|
    print "-> find_preprocessor(", cwd(), ")\n" if $opt_v > 2;
 | 
						|
    my @ok = ();
 | 
						|
 | 
						|
    foreach my $F_or_D (@_) {  # pure file or directory name, no separators
 | 
						|
        next if $F_or_D =~ /^\.{1,2}$/;  # skip .  and  ..
 | 
						|
        if ($Exclude_Dir{$F_or_D}) {
 | 
						|
            $Ignored{$File::Find::name} = "--exclude-dir=$Exclude_Dir{$F_or_D}";
 | 
						|
        } else {
 | 
						|
#printf "  F_or_D=%-20s File::Find::name=%s\n", $F_or_D, $File::Find::name;
 | 
						|
            if (@opt_not_match_d) {
 | 
						|
                my $rule;
 | 
						|
                if ($opt_fullpath) {
 | 
						|
                    if (any_match($File::Find::name, 0, \$rule, @opt_not_match_d)) {
 | 
						|
                        $Ignored{$File::Find::name} = "--not-match-d=$rule";
 | 
						|
                    } else {
 | 
						|
                        push @ok, $F_or_D;
 | 
						|
                    }
 | 
						|
                } elsif (!is_dir($F_or_D) and
 | 
						|
                         any_match($File::Find::name, 0, \$rule,
 | 
						|
                                   @opt_not_match_d)) {
 | 
						|
                    $Ignored{$File::Find::name} = "--not-match-d (basename) =$rule";
 | 
						|
                } else {
 | 
						|
                    push @ok, $F_or_D;
 | 
						|
                }
 | 
						|
            } else {
 | 
						|
                push @ok, $F_or_D;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- find_preprocessor(@ok)\n" if $opt_v > 2;
 | 
						|
    return @ok;
 | 
						|
} # 1}}}
 | 
						|
sub files {                                  # {{{1
 | 
						|
    # invoked by File::Find's find()   Populates global variable @file_list.
 | 
						|
    # See also find_preprocessor() which prunes undesired directories.
 | 
						|
 | 
						|
#   my $Dir = fastcwd();         # fully qualified path -- problematic
 | 
						|
    my $Dir = $File::Find::dir;  # path anchored to cloc's cwd
 | 
						|
    my $rule;
 | 
						|
    if ($opt_fullpath) {
 | 
						|
        # look at as much of the path as is known
 | 
						|
        if ($opt_match_f    ) {
 | 
						|
            return unless $File::Find::name =~ m{$opt_match_f};
 | 
						|
        }
 | 
						|
        if (@opt_not_match_f) {
 | 
						|
            return if any_match($File::Find::name, 0, \$rule, @opt_not_match_f);
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
        # only look at the basename
 | 
						|
        if ($opt_match_f    ) { return unless /$opt_match_f/;     }
 | 
						|
        if (@opt_not_match_f) { return if     any_match($_, 0, \$rule, @opt_not_match_f)}
 | 
						|
    }
 | 
						|
    if ($opt_match_d) {
 | 
						|
        return unless "$Dir/" =~ m{$opt_match_d} or $Dir =~ m{$opt_match_d};
 | 
						|
    }
 | 
						|
 | 
						|
    my $nBytes = get_size($_);
 | 
						|
    if (!$nBytes and is_file($File::Find::name)) {
 | 
						|
        $Ignored{$File::Find::name} = 'zero sized file';
 | 
						|
        printf "files(%s)  zero size\n", $File::Find::name if $opt_v > 5;
 | 
						|
    }
 | 
						|
    return unless $nBytes  ; # attempting other tests w/pipe or socket will hang
 | 
						|
    if ($nBytes > $opt_max_file_size*1024**2) {
 | 
						|
        $Ignored{$File::Find::name} = "file size of " .
 | 
						|
            $nBytes/1024**2 . " MB exceeds max file size of " .
 | 
						|
            "$opt_max_file_size MB";
 | 
						|
        printf "file(%s)  exceeds $opt_max_file_size MB\n",
 | 
						|
            $File::Find::name if $opt_v > 5;
 | 
						|
        return;
 | 
						|
    }
 | 
						|
    my $is_dir = is_dir($_);
 | 
						|
    my $is_bin = is_binary($_);
 | 
						|
    printf "files(%s)  size=%d is_dir=%d  -B=%d\n",
 | 
						|
        $File::Find::name, $nBytes, $is_dir, $is_bin if $opt_v > 5;
 | 
						|
    $is_bin = 0 if $opt_unicode and unicode_file($_);
 | 
						|
    $is_bin = 0 if $opt_read_binary_files;
 | 
						|
    if ($is_bin and !$is_dir) {
 | 
						|
        $Ignored{$File::Find::name} = "binary file";
 | 
						|
        printf "files(%s)  binary file\n", $File::Find::name if $opt_v > 5;
 | 
						|
    }
 | 
						|
    return if $is_dir or $is_bin;
 | 
						|
    ++$nFiles_Found;
 | 
						|
    printf "%8d files\r", $nFiles_Found
 | 
						|
        unless (!$opt_progress_rate or ($nFiles_Found % $opt_progress_rate));
 | 
						|
    push @file_list, $File::Find::name;
 | 
						|
} # 1}}}
 | 
						|
sub archive_files {                          # {{{1
 | 
						|
    # invoked by File::Find's find()  Populates global variable @binary_archive
 | 
						|
    foreach my $ext (keys %Known_Binary_Archives) {
 | 
						|
        push @binary_archive, $File::Find::name
 | 
						|
            if $File::Find::name =~ m{$ext$};
 | 
						|
    }
 | 
						|
} # 1}}}
 | 
						|
sub open_file {                              # {{{1
 | 
						|
    # portable method to open a file. On Windows this uses Win32::LongPath to
 | 
						|
    # allow reading/writing files past the 255 char path length limit. When on
 | 
						|
    # other operating systems, $use_new_file can be used to specify opening a
 | 
						|
    # file with `new IO::File` instead of `open`. Note: `openL` doesn't support
 | 
						|
    # the C-like fopen modes ("w", "r+", etc.), it only supports Perl mode
 | 
						|
    # strings (">", "+<", etc.). So be sure to only use Perl mode strings to
 | 
						|
    # ensure compatibility. Additionally, openL doesn't handle pipe modes; if
 | 
						|
    # you need to open a pipe/STDIN/STDOUT, use the native `open` function.
 | 
						|
    my ($mode,         # Perl file mode; can not be C-style file mode
 | 
						|
        $filename,     # filename to open
 | 
						|
        $use_new_file, # whether to use `new IO::File` or `open` when not using Win32::LongPath
 | 
						|
        ) = @_;
 | 
						|
    if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | 
						|
        my $file = undef;
 | 
						|
        openL(\$file, $mode, $filename);
 | 
						|
        return $file;
 | 
						|
    } elsif ($use_new_file) {
 | 
						|
        return new IO::File $filename, $mode;
 | 
						|
    }
 | 
						|
    my $file = undef;
 | 
						|
    open($file, $mode, $filename);
 | 
						|
    return $file;
 | 
						|
} # 1}}}
 | 
						|
sub unlink_file {                            # {{{1
 | 
						|
    # portable method to unlink a file. On Windows this uses Win32::LongPath to
 | 
						|
    # allow unlinking files past the 255 char path length limit. Otherwise, the
 | 
						|
    # native `unlink` will be used.
 | 
						|
    my $filename = shift @_;
 | 
						|
    if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | 
						|
        return unlinkL($filename);
 | 
						|
    }
 | 
						|
    return unlink $filename;
 | 
						|
} # 1}}}
 | 
						|
sub is_binary {                              # {{{1
 | 
						|
    # portable method to test if item is a binary file. For Windows,
 | 
						|
    # Win32::LongPath doesn't provide a testL option for -B, but -B
 | 
						|
    # accepts a filehandle which does work with files opened with openL.
 | 
						|
    my $item = shift @_;
 | 
						|
    if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | 
						|
        my $IN = open_file('<', $item, 0);
 | 
						|
        if (defined $IN) {
 | 
						|
            my $res = -B $IN;
 | 
						|
            close($IN);
 | 
						|
            return $res;
 | 
						|
        }
 | 
						|
        return;
 | 
						|
    }
 | 
						|
    return (-B $item);
 | 
						|
} # 1}}}
 | 
						|
sub can_read {                               # {{{1
 | 
						|
    # portable method to test if item can be read
 | 
						|
    my $item = shift @_;
 | 
						|
    if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | 
						|
        return testL('r', $item);
 | 
						|
    }
 | 
						|
    return (-r $item);
 | 
						|
} # 1}}}
 | 
						|
sub get_size {                               # {{{1
 | 
						|
    # portable method to get size in bytes of a file
 | 
						|
    my $filename = shift @_;
 | 
						|
    if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | 
						|
        return testL('s', $filename);
 | 
						|
    }
 | 
						|
    return (-s $filename);
 | 
						|
} # 1}}}
 | 
						|
sub is_file {                                # {{{1
 | 
						|
    # portable method to test if item is a file
 | 
						|
    # (-f doesn't work in ActiveState Perl on Windows)
 | 
						|
    my $item = shift @_;
 | 
						|
    if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | 
						|
        return testL('f', $item);
 | 
						|
    }
 | 
						|
    return (-f $item);
 | 
						|
 | 
						|
#     Was:
 | 
						|
####if ($ON_WINDOWS) {
 | 
						|
####    my $mode = (stat $item)[2];
 | 
						|
####       $mode = 0 unless $mode;
 | 
						|
####    if ($mode & 0100000) { return 1; }
 | 
						|
####    else                 { return 0; }
 | 
						|
####} else {
 | 
						|
####    return (-f $item);  # works on Unix, Linux, CygWin, z/OS
 | 
						|
####}
 | 
						|
} # 1}}}
 | 
						|
sub is_dir {                                 # {{{1
 | 
						|
    my $item = shift @_;
 | 
						|
    if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | 
						|
        return testL('d', $item);
 | 
						|
    }
 | 
						|
    return (-d $item); # should work everywhere now (July 2017)
 | 
						|
 | 
						|
#     Was:
 | 
						|
##### portable method to test if item is a directory
 | 
						|
##### (-d doesn't work in older versions of ActiveState Perl on Windows)
 | 
						|
 | 
						|
####if ($ON_WINDOWS) {
 | 
						|
####    my $mode = (stat $item)[2];
 | 
						|
####       $mode = 0 unless $mode;
 | 
						|
####    if ($mode & 0040000) { return 1; }
 | 
						|
####    else                 { return 0; }
 | 
						|
####} else {
 | 
						|
####    return (-d $item);  # works on Unix, Linux, CygWin, z/OS
 | 
						|
####}
 | 
						|
} # 1}}}
 | 
						|
sub is_excluded {                            # {{{1
 | 
						|
    my ($file       , # in
 | 
						|
        $excluded   , # in   hash of excluded directories
 | 
						|
       ) = @_;
 | 
						|
    my($filename, $filepath, $suffix) = fileparse($file);
 | 
						|
    foreach my $path (sort keys %{$excluded}) {
 | 
						|
        return 1 if ($filepath =~ m{^\Q$path\E/}i);
 | 
						|
    }
 | 
						|
} # 1}}}
 | 
						|
sub classify_file {                          # {{{1
 | 
						|
    my ($full_file   , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
        $rh_ignored  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> classify_file($full_file)\n" if $opt_v > 2;
 | 
						|
    my $language = "(unknown)";
 | 
						|
 | 
						|
    if (basename($full_file) eq "-" && defined $opt_stdin_name) {
 | 
						|
       $full_file = $opt_stdin_name;
 | 
						|
    }
 | 
						|
 | 
						|
    my $look_at_first_line = 0;
 | 
						|
    my $file = basename $full_file;
 | 
						|
    if ($opt_autoconf and $file =~ /\.in$/) {
 | 
						|
       $file =~ s/\.in$//;
 | 
						|
    }
 | 
						|
    return $language if $Not_Code_Filename{$file}; # (unknown)
 | 
						|
    return $language if $file =~ m{~$}; # a temp edit file (unknown)
 | 
						|
    if (defined $Language_by_File{$file}) {
 | 
						|
        if      ($Language_by_File{$file} eq "Ant/XML") {
 | 
						|
            return Ant_or_XML(  $full_file, $rh_Err, $raa_errors);
 | 
						|
        } elsif ($Language_by_File{$file} eq "Maven/XML") {
 | 
						|
            return Maven_or_XML($full_file, $rh_Err, $raa_errors);
 | 
						|
        } else {
 | 
						|
            return $Language_by_File{$file};
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    if ($file =~ /\.([^\.]+)$/) { # has an extension
 | 
						|
      print "$full_file extension=[$1]\n" if $opt_v > 2;
 | 
						|
      my $extension = $1;
 | 
						|
         # Windows file names are case insensitive so map
 | 
						|
         # all extensions to lowercase there.
 | 
						|
         $extension = lc $extension if $ON_WINDOWS or $opt_ignore_case_ext;
 | 
						|
      my @extension_list = ( $extension );
 | 
						|
      if ($file =~ /\.([^\.]+\.[^\.]+)$/) { # has a double extension
 | 
						|
          my $extension = $1;
 | 
						|
          $extension = lc $extension if $ON_WINDOWS or $opt_ignore_case_ext;
 | 
						|
          unshift @extension_list, $extension;  # examine double ext first
 | 
						|
      }
 | 
						|
      if ($file =~ /\.([^\.]+\.[^\.]+\.[^\.]+)$/) { # has a triple extension
 | 
						|
          my $extension = $1;
 | 
						|
          $extension = lc $extension if $ON_WINDOWS or $opt_ignore_case_ext;
 | 
						|
          unshift @extension_list, $extension;  # examine triple ext first
 | 
						|
      }
 | 
						|
      foreach my $extension (@extension_list) {
 | 
						|
        if ($Not_Code_Extension{$extension} and
 | 
						|
           !$Forced_Extension{$extension}) {
 | 
						|
           # If .1 (for example) is an extension that would ordinarily be
 | 
						|
           # ignored but the user has insisted this be counted with the
 | 
						|
           # --force-lang option, then go ahead and count it.
 | 
						|
            $rh_ignored->{$full_file} =
 | 
						|
                'listed in $Not_Code_Extension{' . $extension . '}';
 | 
						|
            return $language;
 | 
						|
        }
 | 
						|
        # handle extension collisions
 | 
						|
        if (defined $Language_by_Extension{$extension}) {
 | 
						|
            if ($Language_by_Extension{$extension} eq
 | 
						|
                'MATLAB/Mathematica/Objective-C/MUMPS/Mercury') {
 | 
						|
                my $lang_M_or_O = "";
 | 
						|
                matlab_or_objective_C($full_file ,
 | 
						|
                                      $rh_Err    ,
 | 
						|
                                      $raa_errors,
 | 
						|
                                     \$lang_M_or_O);
 | 
						|
                if ($lang_M_or_O) {
 | 
						|
                    return $lang_M_or_O;
 | 
						|
                } else { # an error happened in matlab_or_objective_C()
 | 
						|
                    $rh_ignored->{$full_file} =
 | 
						|
                        'failure in matlab_or_objective_C($full_file)';
 | 
						|
                    return $language; # (unknown)
 | 
						|
                }
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'PHP/Pascal/Fortran') {
 | 
						|
                my $lang_F_or_P_or_P = "";
 | 
						|
                php_pascal_or_fortran($full_file ,
 | 
						|
                                      $rh_Err    ,
 | 
						|
                                      $raa_errors,
 | 
						|
                                     \$lang_F_or_P_or_P);
 | 
						|
                if ($lang_F_or_P_or_P) {
 | 
						|
                    return $lang_F_or_P_or_P;
 | 
						|
                } else { # an error happened in php_pascal_or_fortran()
 | 
						|
                    $rh_ignored->{$full_file} =
 | 
						|
                        'failure in php_pascal_or_fortran($full_file)';
 | 
						|
                    return $language; # (unknown)
 | 
						|
                }
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'Pascal/Puppet') {
 | 
						|
                my $lang_Pasc_or_Pup = "";
 | 
						|
                pascal_or_puppet(     $full_file ,
 | 
						|
                                      $rh_Err    ,
 | 
						|
                                      $raa_errors,
 | 
						|
                                     \$lang_Pasc_or_Pup);
 | 
						|
                if ($lang_Pasc_or_Pup) {
 | 
						|
                    return $lang_Pasc_or_Pup;
 | 
						|
                } else { # an error happened in pascal_or_puppet()
 | 
						|
                    $rh_ignored->{$full_file} =
 | 
						|
                        'failure in pascal_or_puppet()';
 | 
						|
                    return $language; # (unknown)
 | 
						|
                }
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'Lisp/OpenCL') {
 | 
						|
                return Lisp_or_OpenCL($full_file, $rh_Err, $raa_errors);
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'Lisp/Julia') {
 | 
						|
                return Lisp_or_Julia( $full_file, $rh_Err, $raa_errors);
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'Perl/Prolog') {
 | 
						|
                return Perl_or_Prolog($full_file, $rh_Err, $raa_errors);
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'Raku/Prolog') {
 | 
						|
                return Raku_or_Prolog($full_file, $rh_Err, $raa_errors);
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq
 | 
						|
                     'IDL/Qt Project/Prolog/ProGuard') {
 | 
						|
                return IDL_or_QtProject($full_file, $rh_Err, $raa_errors);
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'D/dtrace') {
 | 
						|
                # is it D or an init.d shell script?
 | 
						|
                my $a_script = really_is_D($full_file, $rh_Err, $raa_errors);
 | 
						|
                if ($a_script) {
 | 
						|
                    # could be dtrace, sh, bash or anything one would
 | 
						|
                    # write an init.d script in
 | 
						|
                    if (defined $Language_by_Script{$a_script}) {
 | 
						|
                        return $Language_by_Script{$a_script};
 | 
						|
                    } else {
 | 
						|
                        $rh_ignored->{$full_file} =
 | 
						|
                            "Unrecognized script language, '$a_script'";
 | 
						|
                    }
 | 
						|
                } else {
 | 
						|
                    return 'D';
 | 
						|
                }
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'Fortran 77/Forth') {
 | 
						|
                return Forth_or_Fortran($full_file, $rh_Err, $raa_errors);
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'F#/Forth') {
 | 
						|
                return Forth_or_Fsharp( $full_file, $rh_Err, $raa_errors);
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'Verilog-SystemVerilog/Coq') {
 | 
						|
                return Verilog_or_Coq( $full_file, $rh_Err, $raa_errors);
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'Smarty') {
 | 
						|
                if ($extension ne "tpl") {
 | 
						|
                    # unambiguous -- if ends with .smarty, is Smarty
 | 
						|
                    return $Language_by_Extension{$extension};
 | 
						|
                }
 | 
						|
                # Smarty extension .tpl is generic; make sure the
 | 
						|
                # file at least roughly resembles PHP.  Alternatively,
 | 
						|
                # if the user forces the issue, do the count.
 | 
						|
                my $force_smarty = 0;
 | 
						|
                foreach (@opt_force_lang) {
 | 
						|
                    if (lc($_) eq "smarty,tpl") {
 | 
						|
                        $force_smarty = 1;
 | 
						|
                        last;
 | 
						|
                    }
 | 
						|
                }
 | 
						|
                if (really_is_smarty($full_file) or $force_smarty) {
 | 
						|
                    return 'Smarty';
 | 
						|
                } else {
 | 
						|
                    return $language; # (unknown)
 | 
						|
                }
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'TypeScript/Qt Linguist') {
 | 
						|
                return TypeScript_or_QtLinguist( $full_file, $rh_Err, $raa_errors);
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'Qt/Glade') {
 | 
						|
                return Qt_or_Glade( $full_file, $rh_Err, $raa_errors);
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'C#/Smalltalk') {
 | 
						|
                my $L = Csharp_or_Smalltalk( $full_file, $rh_Err, $raa_errors);
 | 
						|
                if ($L eq 'C#') {
 | 
						|
                    my $lines = first_line($full_file, 2, $rh_Err, $raa_errors);
 | 
						|
                    $lines =~ s/\n//mg;
 | 
						|
                    if ($lines =~ m[//\s+<auto-generated>]) {
 | 
						|
                        $L = "C# Generated";
 | 
						|
                        if ($opt_no_autogen) {
 | 
						|
                            $rh_ignored->{$full_file} =
 | 
						|
                                '--no-autogen ignores this auto-generated C# file';
 | 
						|
                            $L = "(unknown)"; # forces it to be ignored
 | 
						|
                        }
 | 
						|
                    }
 | 
						|
                }
 | 
						|
                return $L;
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'Scheme/SaltStack') {
 | 
						|
                return Scheme_or_SaltStack( $full_file, $rh_Err, $raa_errors);
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'Visual Basic/TeX/Apex Class') {
 | 
						|
                my $lang_VB_T_A = "";
 | 
						|
                Visual_Basic_or_TeX_or_Apex($full_file ,
 | 
						|
                                            $rh_Err    ,
 | 
						|
                                            $raa_errors,
 | 
						|
                                           \$lang_VB_T_A);
 | 
						|
                if ($lang_VB_T_A) {
 | 
						|
                    return $lang_VB_T_A;
 | 
						|
                } else { # an error happened in Visual_Basic_or_TeX_or_Apex
 | 
						|
                    $rh_ignored->{$full_file} =
 | 
						|
                        'failure in Visual_Basic_or_TeX_or_Apex()';
 | 
						|
                    return $language; # (unknown)
 | 
						|
                }
 | 
						|
            } elsif ($Language_by_Extension{$extension} eq 'Brainfuck') {
 | 
						|
                if (really_is_bf($full_file)) {
 | 
						|
                    return $Language_by_Extension{$extension};
 | 
						|
                } else {
 | 
						|
                    return $language; # (unknown)
 | 
						|
                }
 | 
						|
            } else {
 | 
						|
                return $Language_by_Extension{$extension};
 | 
						|
            }
 | 
						|
        } else { # has an unmapped file extension
 | 
						|
            $look_at_first_line = 1;
 | 
						|
        }
 | 
						|
      }
 | 
						|
      # if all else fails look at the prefix instead of extension
 | 
						|
      ( my $stem = $file ) =~ s/^(.*?)\.\S+$/$1/;
 | 
						|
      if ($stem and defined($Language_by_Prefix{$stem})) {
 | 
						|
          return $Language_by_Prefix{$stem}
 | 
						|
      }
 | 
						|
    } elsif (defined $Language_by_File{lc $file}) {
 | 
						|
        return $Language_by_File{lc $file};
 | 
						|
    } elsif ($opt_lang_no_ext and
 | 
						|
             defined $Filters_by_Language{$opt_lang_no_ext}) {
 | 
						|
        return $opt_lang_no_ext;
 | 
						|
    } else {  # no file extension
 | 
						|
        $look_at_first_line = 1;
 | 
						|
    }
 | 
						|
 | 
						|
    if ($look_at_first_line) {
 | 
						|
        # maybe it is a shell/Perl/Python/Ruby/etc script that
 | 
						|
        # starts with pound bang:
 | 
						|
        #   #!/usr/bin/perl
 | 
						|
        #   #!/usr/bin/env perl
 | 
						|
        my ($script_language, $L) = peek_at_first_line($full_file ,
 | 
						|
                                                       $rh_Err    ,
 | 
						|
                                                       $raa_errors);
 | 
						|
        if (!$script_language) {
 | 
						|
            $rh_ignored->{$full_file} = "language unknown (#2)";
 | 
						|
            # returns (unknown)
 | 
						|
        }
 | 
						|
        if (defined $Language_by_Script{$script_language}) {
 | 
						|
            if (defined $Filters_by_Language{
 | 
						|
                            $Language_by_Script{$script_language}}) {
 | 
						|
                $language = $Language_by_Script{$script_language};
 | 
						|
            } else {
 | 
						|
                $rh_ignored->{$full_file} =
 | 
						|
                    "undefined:  Filters_by_Language{" .
 | 
						|
                    $Language_by_Script{$script_language} .
 | 
						|
                    "} for scripting language $script_language";
 | 
						|
                # returns (unknown)
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            # #456:  XML files can have a variety of domain-specific file
 | 
						|
            #        extensions.  If the extension is unrecognized, examine
 | 
						|
            #        the first line of the file to see if it is XML
 | 
						|
            if ($L =~ /<\?xml\s/) {
 | 
						|
                $language = "XML";
 | 
						|
                delete $rh_ignored->{$full_file};
 | 
						|
            } else {
 | 
						|
                $rh_ignored->{$full_file} = "language unknown (#3)";
 | 
						|
            }
 | 
						|
            # returns (unknown)
 | 
						|
        }
 | 
						|
    }
 | 
						|
    print "<- classify_file($full_file)=$language\n" if $opt_v > 2;
 | 
						|
    return $language;
 | 
						|
} # 1}}}
 | 
						|
sub first_line {                             # {{{1
 | 
						|
    # return the first $n_lines of text in the file as one string
 | 
						|
    my ($file        , # in
 | 
						|
        $n_lines     , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
    my $line = "";
 | 
						|
    print "-> first_line($file, $n_lines)\n" if $opt_v > 2;
 | 
						|
    if (!can_read($file)) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return $line;
 | 
						|
    }
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        print "<- first_line($file, $n_lines)\n" if $opt_v > 2;
 | 
						|
        return $line;
 | 
						|
    }
 | 
						|
    # issue 644: Unicode files can have non-zero $n_lines
 | 
						|
    # but empty <$IN> contents
 | 
						|
    for (my $i = 0; $i < $n_lines; $i++) {
 | 
						|
        my $L = <$IN>;
 | 
						|
        last unless defined $L;
 | 
						|
        chomp($line .= $L);
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
    print "<- first_line($file, $n_lines, '$line')\n" if $opt_v > 2;
 | 
						|
    return $line;
 | 
						|
} # 1}}}
 | 
						|
sub peek_at_first_line {                     # {{{1
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> peek_at_first_line($file)\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $script_language = "";
 | 
						|
    my $first_line = first_line($file, 1, $rh_Err, $raa_errors);
 | 
						|
 | 
						|
    if (defined $first_line) {
 | 
						|
#print "peek_at_first_line of [$file] first_line=[$first_line]\n";
 | 
						|
        if ($first_line =~ /^#\!\s*(\S.*?)$/) {
 | 
						|
#print "peek_at_first_line 1=[$1]\n";
 | 
						|
            my @pound_bang = split(' ', $1);
 | 
						|
#print "peek_at_first_line basename 0=[", basename($pound_bang[0]), "]\n";
 | 
						|
            if (basename($pound_bang[0]) eq "env" and
 | 
						|
                scalar @pound_bang > 1) {
 | 
						|
                $script_language = $pound_bang[1];
 | 
						|
#print "peek_at_first_line pound_bang A $pound_bang[1]\n";
 | 
						|
            } else {
 | 
						|
                $script_language = basename $pound_bang[0];
 | 
						|
#print "peek_at_first_line pound_bang B $script_language\n";
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
    print "<- peek_at_first_line($file)\n" if $opt_v > 2;
 | 
						|
    return ($script_language, $first_line);
 | 
						|
} # 1}}}
 | 
						|
sub different_files {                        # {{{1
 | 
						|
    # See which of the given files are unique by computing each file's MD5
 | 
						|
    # sum.  Return the subset of files which are unique.
 | 
						|
    my ($ra_files    , # in
 | 
						|
        $rh_Err      , # in
 | 
						|
        $raa_errors  , # out
 | 
						|
        $rh_ignored  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> different_files(@{$ra_files})\n" if $opt_v > 2;
 | 
						|
    my %file_hash = ();  # file_hash{md5 hash} = [ file1, file2, ... ]
 | 
						|
    foreach my $F (@{$ra_files}) {
 | 
						|
        next if is_dir($F);  # needed for Windows
 | 
						|
        my $IN = open_file('<', $F, 1);
 | 
						|
        if (!defined $IN) {
 | 
						|
            push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $F];
 | 
						|
            $rh_ignored->{$F} = 'cannot read';
 | 
						|
        } else {
 | 
						|
            if ($HAVE_Digest_MD5) {
 | 
						|
                binmode $IN;
 | 
						|
                my $MD5 = Digest::MD5->new->addfile($IN)->hexdigest;
 | 
						|
#print "$F, $MD5\n";
 | 
						|
                push @{$file_hash{$MD5}}, $F;
 | 
						|
            } else {
 | 
						|
                # all files treated unique
 | 
						|
                push @{$file_hash{$F}}, $F;
 | 
						|
            }
 | 
						|
            $IN->close;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    # Loop over file sets having identical MD5 sums.  Within
 | 
						|
    # each set, pick the file that most resembles known source
 | 
						|
    # code.
 | 
						|
    my @unique = ();
 | 
						|
    for my $md5 (sort keys %file_hash) {
 | 
						|
        my $i_best = 0;
 | 
						|
        for (my $i = 1; $i < scalar(@{$file_hash{$md5}}); $i++) {
 | 
						|
            my $F = $file_hash{$md5}[$i];
 | 
						|
            my (@nul_a, %nul_h);
 | 
						|
            my $language = classify_file($F, $rh_Err,
 | 
						|
                                        # don't save these errors; pointless
 | 
						|
                                        \@nul_a, \%nul_h);
 | 
						|
            $i_best = $i if $language ne "(unknown)";
 | 
						|
        }
 | 
						|
        # keep the best one found and identify the rest as ignored
 | 
						|
        for (my $i = 0; $i < scalar(@{$file_hash{$md5}}); $i++) {
 | 
						|
            if ($i == $i_best) {
 | 
						|
                push @unique, $file_hash{$md5}[$i_best];
 | 
						|
            } else {
 | 
						|
                $rh_ignored->{$file_hash{$md5}[$i]} = "duplicate of " .
 | 
						|
                    $file_hash{$md5}[$i_best];
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
    }
 | 
						|
    print "<- different_files(@unique)\n" if $opt_v > 2;
 | 
						|
    return @unique;
 | 
						|
} # 1}}}
 | 
						|
sub call_counter {                           # {{{1
 | 
						|
    my ($file     , # in
 | 
						|
        $language , # in
 | 
						|
        $ra_Errors, # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    # Logic:  pass the file through the following filters:
 | 
						|
    #         1. remove leading lines (if --skip-leading)
 | 
						|
    #         2. remove blank lines
 | 
						|
    #         3. remove comments using each filter defined for this language
 | 
						|
    #            (example:  SQL has two, remove_starts_with(--) and
 | 
						|
    #             remove_c_comments() )
 | 
						|
    #         4. compute comment lines as
 | 
						|
    #               total lines - blank lines - lines left over after all
 | 
						|
    #                   comment filters have been applied
 | 
						|
 | 
						|
    print "-> call_counter($file, $language)\n" if $opt_v > 2;
 | 
						|
#print "call_counter:  ", Dumper(@routines), "\n";
 | 
						|
 | 
						|
    my @lines = ();
 | 
						|
    my $ascii = "";
 | 
						|
    if (is_binary($file) and $opt_unicode) {
 | 
						|
        # was binary so must be unicode
 | 
						|
 | 
						|
        $/ = undef;
 | 
						|
        my $IN = open_file('<', $file, 1);
 | 
						|
        my $bin_text = <$IN>;
 | 
						|
        $IN->close;
 | 
						|
        $/ = "\n";
 | 
						|
 | 
						|
        $ascii = unicode_to_ascii( $bin_text );
 | 
						|
        @lines = split("\n", $ascii );
 | 
						|
        foreach (@lines) { $_ = "$_\n"; }
 | 
						|
 | 
						|
    } else {
 | 
						|
        # regular text file
 | 
						|
        @lines = read_file($file);
 | 
						|
        $ascii = join('', @lines);
 | 
						|
    }
 | 
						|
 | 
						|
    # implement --perl-ignore-data here
 | 
						|
 | 
						|
    if ($opt_skip_leading) {
 | 
						|
        my $strip = 1;
 | 
						|
        my ($N, @exts) = split(/,/, $opt_skip_leading);
 | 
						|
        if (@exts) {
 | 
						|
            # only apply if this file's extension is listed
 | 
						|
            my $this_file_ext = file_extension($file);
 | 
						|
            $strip = grep(/^${this_file_ext}$/, @exts);
 | 
						|
        }
 | 
						|
        @lines = remove_first_n($N, \@lines) if $strip;
 | 
						|
    }
 | 
						|
 | 
						|
    my @original_lines = @lines;
 | 
						|
    my $total_lines    = scalar @lines;
 | 
						|
 | 
						|
    print_lines($file, "Original file:", \@lines) if $opt_print_filter_stages;
 | 
						|
    @lines = rm_blanks(\@lines, $language, \%EOL_Continuation_re); # remove blank lines
 | 
						|
    my $blank_lines = $total_lines - scalar @lines;
 | 
						|
    print "   call_counter: total_lines=$total_lines  blank_lines=",
 | 
						|
        $blank_lines, "\n" if $opt_v > 2;
 | 
						|
    print_lines($file, "Blank lines removed:", \@lines)
 | 
						|
        if $opt_print_filter_stages;
 | 
						|
 | 
						|
    @lines = rm_comments(\@lines, $language, $file,
 | 
						|
                               \%EOL_Continuation_re, $ra_Errors);
 | 
						|
 | 
						|
    my $comment_lines = $total_lines - $blank_lines - scalar  @lines;
 | 
						|
    if ($opt_strip_comments) {
 | 
						|
        my $stripped_file = "";
 | 
						|
        if ($opt_original_dir) {
 | 
						|
            $stripped_file =          $file . ".$opt_strip_comments";
 | 
						|
        } else {
 | 
						|
            $stripped_file = basename $file . ".$opt_strip_comments";
 | 
						|
        }
 | 
						|
        write_file($stripped_file, {}, @lines);
 | 
						|
    }
 | 
						|
    if ($opt_strip_code) {
 | 
						|
        my $stripped_file = "";
 | 
						|
        if ($opt_original_dir) {
 | 
						|
            $stripped_file =          $file . ".$opt_strip_code";
 | 
						|
        } else {
 | 
						|
            $stripped_file = basename $file . ".$opt_strip_code";
 | 
						|
        }
 | 
						|
        write_file($stripped_file, {}, rm_code(\@original_lines, \@lines));
 | 
						|
    }
 | 
						|
    if ($opt_html and !$opt_diff) {
 | 
						|
        chomp(@original_lines);  # includes blank lines, comments
 | 
						|
        chomp(@lines);           # no blank lines, no comments
 | 
						|
 | 
						|
        my (@diff_L, @diff_R, %count);
 | 
						|
 | 
						|
        # remove blank lines to get better quality diffs; count
 | 
						|
        # blank lines separately
 | 
						|
        my @original_lines_minus_white = ();
 | 
						|
        # however must keep track of how many blank lines were removed and
 | 
						|
        # where they were removed so that the HTML display can include it
 | 
						|
        my %blank_line  = ();
 | 
						|
        my $insert_line = 0;
 | 
						|
        foreach (@original_lines) {
 | 
						|
            if (/^\s*$/) {
 | 
						|
               ++$count{blank}{same};
 | 
						|
               ++$blank_line{ $insert_line };
 | 
						|
            } else {
 | 
						|
                ++$insert_line;
 | 
						|
                push @original_lines_minus_white, $_;
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        array_diff( $file                       ,   # in
 | 
						|
                   \@original_lines_minus_white ,   # in
 | 
						|
                   \@lines                      ,   # in
 | 
						|
                   "comment"                    ,   # in
 | 
						|
                   \@diff_L, \@diff_R,          ,   # out
 | 
						|
                    $ra_Errors);                    # in/out
 | 
						|
        write_comments_to_html($file, \@diff_L, \@diff_R, \%blank_line);
 | 
						|
#print Dumper("count", \%count);
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- call_counter($total_lines, $blank_lines, $comment_lines)\n"
 | 
						|
        if $opt_v > 2;
 | 
						|
    return ($total_lines, $blank_lines, $comment_lines);
 | 
						|
} # 1}}}
 | 
						|
sub windows_glob {                           # {{{1
 | 
						|
    # Windows doesn't expand wildcards.  Use code from Sean M. Burke's
 | 
						|
    # Win32::Autoglob module to do this.
 | 
						|
    return map {;
 | 
						|
        ( defined($_) and m/[\*\?]/ ) ? sort(glob($_)) : $_
 | 
						|
          } @_;
 | 
						|
} # 1}}}
 | 
						|
sub write_file {                             # {{{1
 | 
						|
    my ($file       , # in
 | 
						|
        $rh_options , # in
 | 
						|
        @lines      , # in
 | 
						|
       ) = @_;
 | 
						|
    # If $file is a conventional scalar, it is the name of the file to write to.
 | 
						|
    # if $file is a reference to a scalar, rather than writing @lines to a file,
 | 
						|
    # write @lines to this scalar as a single string.
 | 
						|
 | 
						|
    my $local_formatting = 0;
 | 
						|
    foreach my $opt (sort keys %{$rh_options}) {
 | 
						|
#       print "write_file option $opt = $rh_options->{$opt}\n";
 | 
						|
        $local_formatting = 1;
 | 
						|
    }
 | 
						|
    my $write_to_file = ref($file) eq "" ? 1 : 0;
 | 
						|
 | 
						|
#print "write_file 1 [$file]\n";
 | 
						|
    # Do ~ expansion (by Tim LaBerge, fixes bug 2787984)
 | 
						|
    if ($write_to_file) {
 | 
						|
        my $preglob_filename = $file;
 | 
						|
    #print "write_file 2 [$preglob_filename]\n";
 | 
						|
        if ($ON_WINDOWS) {
 | 
						|
            $file = (windows_glob($file))[0];
 | 
						|
        } else {
 | 
						|
            $file = File::Glob::bsd_glob($file);
 | 
						|
        }
 | 
						|
    #print "write_file 3 [$file]\n";
 | 
						|
        $file = $preglob_filename unless $file;
 | 
						|
    #print "write_file 4 [$file]\n";
 | 
						|
    }
 | 
						|
 | 
						|
    if ($write_to_file) {
 | 
						|
        print "-> write_file($file)\n" if $opt_v > 2;
 | 
						|
    } else {
 | 
						|
        print "-> write_file() -- writing to string variable\n" if $opt_v > 2;
 | 
						|
    }
 | 
						|
 | 
						|
    my @prt_lines = ();
 | 
						|
 | 
						|
    my $n_col = undef;
 | 
						|
    if ($local_formatting) {
 | 
						|
        $n_col = scalar @{$rh_options->{'columns'}};
 | 
						|
        if ($opt_xml) {
 | 
						|
            push @prt_lines, '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
 | 
						|
            push @prt_lines, "<all_$rh_options->{'file_type'}>\n";
 | 
						|
        } elsif ($opt_yaml) {
 | 
						|
            push @prt_lines, "---\n";
 | 
						|
        } elsif ($opt_md) {
 | 
						|
            push @prt_lines, join("|", @{$rh_options->{'columns'}})  . "\n";
 | 
						|
            push @prt_lines, join("|", map( ":------", 1 .. $n_col)) . "\n";
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    chomp(@lines);
 | 
						|
 | 
						|
    if ($local_formatting) {
 | 
						|
        my @json_lines = ();
 | 
						|
        foreach my $L (@lines) {
 | 
						|
            my @entries;
 | 
						|
            if ($rh_options->{'separator'}) {
 | 
						|
                @entries = split($rh_options->{'separator'}, $L, $n_col);
 | 
						|
            } else {
 | 
						|
                @entries = ( $L );
 | 
						|
            }
 | 
						|
            if ($opt_xml) {
 | 
						|
                push @prt_lines, "  <$rh_options->{'file_type'} ";
 | 
						|
                for (my $i = 0; $i < $n_col; $i++) {
 | 
						|
                    push @prt_lines, sprintf("%s=\"%s\" ", $rh_options->{'columns'}[$i], $entries[$i]);
 | 
						|
                }
 | 
						|
                push @prt_lines, "/>\n";
 | 
						|
            } elsif ($opt_yaml or $opt_json) {
 | 
						|
                my @pairs = ();
 | 
						|
                for (my $i = 0; $i < $n_col; $i++) {
 | 
						|
                    push @pairs,
 | 
						|
                        sprintf "\"%s\":\"%s\"", $rh_options->{'columns'}[$i], $entries[$i];
 | 
						|
                }
 | 
						|
                if ($opt_json) {
 | 
						|
                    # JSON can't literal '\x' in filenames, #575
 | 
						|
                    $pairs[0] =~ s/\\x//g;
 | 
						|
                    push @json_lines, join(", ", @pairs );
 | 
						|
                } else {
 | 
						|
                    push @prt_lines, "- {", join(", ", @pairs) . "}\n";
 | 
						|
                }
 | 
						|
            } elsif ($opt_csv) {
 | 
						|
                push @prt_lines, join(",", @entries) . "\n";
 | 
						|
            } elsif ($opt_md) {
 | 
						|
                push @prt_lines, join("|", @entries) . "\n";
 | 
						|
            }
 | 
						|
        }
 | 
						|
        if ($opt_json) {
 | 
						|
            push @prt_lines, "[{" . join("},\n {", @json_lines) . "}]\n";
 | 
						|
        }
 | 
						|
        if (!$opt_json and !$opt_yaml and !$opt_xml and !$opt_csv) {
 | 
						|
            push @prt_lines, join("\n", @lines) . "\n";
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
        push @prt_lines, join("\n", @lines) . "\n";
 | 
						|
    }
 | 
						|
 | 
						|
    if ($local_formatting and $opt_xml) {
 | 
						|
        push @prt_lines, "</all_$rh_options->{'file_type'}>\n";
 | 
						|
    }
 | 
						|
 | 
						|
    # Create the destination directory if it doesn't already exist.
 | 
						|
    my $abs_file_path = File::Spec->rel2abs( $file );
 | 
						|
    my ($volume, $directories, $filename) = File::Spec->splitpath( $abs_file_path );
 | 
						|
    mkpath($volume . $directories, 1, 0777);
 | 
						|
 | 
						|
    my $OUT = undef;
 | 
						|
    unlink_file($file);
 | 
						|
    if ($opt_file_encoding) {
 | 
						|
        $OUT = open_file(">:encoding($opt_file_encoding)", $file, 0);
 | 
						|
    } else {
 | 
						|
        $OUT = open_file('>', $file, 1);
 | 
						|
    }
 | 
						|
    if (!defined $OUT) {
 | 
						|
        warn "Unable to write to $file\n";
 | 
						|
        print "<- write_file\n" if $opt_v > 2;
 | 
						|
        return;
 | 
						|
    }
 | 
						|
    print $OUT @prt_lines;
 | 
						|
    $OUT->close;
 | 
						|
 | 
						|
    if (can_read($file)) {
 | 
						|
        print "Wrote $file" unless $opt_quiet or $opt_fmt > 0;
 | 
						|
        print ", $CLOC_XSL" if $opt_xsl and $opt_xsl eq $CLOC_XSL;
 | 
						|
        print "\n" unless $opt_quiet;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- write_file\n" if $opt_v > 2;
 | 
						|
} # 1}}}
 | 
						|
sub file_pairs_from_file {                   # {{{1
 | 
						|
    my ($file             , # in
 | 
						|
        $ra_added         , # out
 | 
						|
        $ra_removed       , # out
 | 
						|
        $ra_compare_list  , # out
 | 
						|
       ) = @_;
 | 
						|
    #
 | 
						|
    # Example valid input format for $file
 | 
						|
    # 1)
 | 
						|
    #   A/d1/hello.f90 | B/d1/hello.f90
 | 
						|
    #   A/hello.C | B/hello.C
 | 
						|
    #   A/d2/hi.py | B/d2/hi.py
 | 
						|
    #
 | 
						|
    # 2)
 | 
						|
    # Files added: 1
 | 
						|
    #   + B/extra_file.pl ; Perl
 | 
						|
    #
 | 
						|
    # Files removed: 1
 | 
						|
    #   - A/d2/hello.java ; Java
 | 
						|
    #
 | 
						|
    # File pairs compared: 3
 | 
						|
    #   != A/d1/hello.f90 | B/d1/hello.f90 ; Fortran 90
 | 
						|
    #   != A/hello.C | B/hello.C ; C++
 | 
						|
    #   == A/d2/hi.py | B/d2/hi.py ; Python
 | 
						|
 | 
						|
    print "-> file_pairs_from_file($file)\n" if $opt_v and $opt_v > 2;
 | 
						|
    @{$ra_compare_list} = ();
 | 
						|
    my @lines = read_file($file);
 | 
						|
    my $mode = "compare";
 | 
						|
    foreach my $L (@lines) {
 | 
						|
        next if $L =~ /^\s*$/ or $L =~ /^\s*#/;
 | 
						|
        chomp($L);
 | 
						|
        if      ($L =~ /^Files\s+(added|removed):/) {
 | 
						|
            $mode = $1;
 | 
						|
        } elsif ($L =~ /^File\s+pairs\s+compared:/) {
 | 
						|
            $mode = "compare";
 | 
						|
        } elsif ($mode eq "added" or $mode eq "removed") {
 | 
						|
            $L =~ m/^\s*[+-]\s+(.*?)\s+;/;
 | 
						|
            my $F = $1;
 | 
						|
            if (!defined $1) {
 | 
						|
                warn "file_pairs_from_file($file) parse failure\n",
 | 
						|
                     "in $mode mode for '$L', ignoring\n";
 | 
						|
                next;
 | 
						|
            }
 | 
						|
            if ($mode eq "added") {
 | 
						|
                push @{$ra_added}  , $F;
 | 
						|
            } else {
 | 
						|
                push @{$ra_removed}, $F;
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            $L =~ m/^\s*([!=]=\s*)?(.*?)\s*\|\s*(.*?)\s*(;.*?)?$/;
 | 
						|
            if (!defined $2 or !defined $3) {
 | 
						|
                warn "file_pairs_from_file($file) parse failure\n",
 | 
						|
                     "in compare mode for '$L', ignoring\n";
 | 
						|
                next;
 | 
						|
            }
 | 
						|
            push @{$ra_compare_list}, ( [$2, $3] );
 | 
						|
        }
 | 
						|
    }
 | 
						|
    print "<- file_pairs_from_file\n" if $opt_v and $opt_v > 2;
 | 
						|
}
 | 
						|
sub read_file  {                             # {{{1
 | 
						|
    my ($file, ) = @_;
 | 
						|
    print "-> read_file($file)\n" if $opt_v and $opt_v > 2;
 | 
						|
    my %BoM = (
 | 
						|
        "fe ff"           => 2 ,
 | 
						|
        "ff fe"           => 2 ,
 | 
						|
        "ef bb bf"        => 3 ,
 | 
						|
        "f7 64 4c"        => 3 ,
 | 
						|
        "0e fe ff"        => 3 ,
 | 
						|
        "fb ee 28"        => 3 ,
 | 
						|
        "00 00 fe ff"     => 4 ,
 | 
						|
        "ff fe 00 00"     => 4 ,
 | 
						|
        "2b 2f 76 38"     => 4 ,
 | 
						|
        "2b 2f 76 39"     => 4 ,
 | 
						|
        "2b 2f 76 2b"     => 4 ,
 | 
						|
        "2b 2f 76 2f"     => 4 ,
 | 
						|
        "dd 73 66 73"     => 4 ,
 | 
						|
        "84 31 95 33"     => 4 ,
 | 
						|
        "2b 2f 76 38 2d"  => 5 ,
 | 
						|
        );
 | 
						|
 | 
						|
    my @lines = ();
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (defined $IN) {
 | 
						|
        @lines = <$IN>;
 | 
						|
        $IN->close;
 | 
						|
        if ($lines[$#lines]) {  # test necessary for zero content files
 | 
						|
                                # (superfluous?)
 | 
						|
            # Some files don't end with a new line.  Force this:
 | 
						|
            $lines[$#lines] .= "\n" unless $lines[$#lines] =~ m/\n$/;
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
        warn "Unable to read $file\n";
 | 
						|
    }
 | 
						|
 | 
						|
    # Are first few characters of the file Unicode Byte Order
 | 
						|
    # Marks (http://en.wikipedia.org/wiki/Byte_Order_Mark)?
 | 
						|
    # If yes, remove them.
 | 
						|
    if (@lines) {
 | 
						|
        my @chrs   = split('', $lines[0]);
 | 
						|
        my $n_chrs = scalar @chrs;
 | 
						|
        my ($n2, $n3, $n4, $n5) = ('', '', '', '');
 | 
						|
        $n2 = sprintf("%x %x", map  ord, @chrs[0,1]) if $n_chrs >= 2;
 | 
						|
        $n3 = sprintf("%s %x", $n2, ord  $chrs[2])   if $n_chrs >= 3;
 | 
						|
        $n4 = sprintf("%s %x", $n3, ord  $chrs[3])   if $n_chrs >= 4;
 | 
						|
        $n5 = sprintf("%s %x", $n4, ord  $chrs[4])   if $n_chrs >= 5;
 | 
						|
        if      (defined $BoM{$n2}) { $lines[0] = substr $lines[0], 2;
 | 
						|
        } elsif (defined $BoM{$n3}) { $lines[0] = substr $lines[0], 3;
 | 
						|
        } elsif (defined $BoM{$n4}) { $lines[0] = substr $lines[0], 4;
 | 
						|
        } elsif (defined $BoM{$n5}) { $lines[0] = substr $lines[0], 5;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    # Trim DOS line endings.  This allows Windows files
 | 
						|
    # to be diff'ed with Unix files without line endings
 | 
						|
    # causing every line to differ.
 | 
						|
    foreach (@lines) { s/\cM$// }
 | 
						|
 | 
						|
    print "<- read_file\n" if $opt_v and $opt_v > 2;
 | 
						|
    return @lines;
 | 
						|
} # 1}}}
 | 
						|
sub rm_blanks {                              # {{{1
 | 
						|
    my ($ra_in    ,
 | 
						|
        $language ,
 | 
						|
        $rh_EOL_continuation_re) = @_;
 | 
						|
    print "-> rm_blanks(language=$language)\n" if $opt_v > 2;
 | 
						|
#print "rm_blanks: language = [$language]\n";
 | 
						|
    my @out = ();
 | 
						|
    if ($language eq "COBOL") {
 | 
						|
        @out = remove_cobol_blanks($ra_in);
 | 
						|
    } else {
 | 
						|
        # removes blank lines
 | 
						|
        if (defined $rh_EOL_continuation_re->{$language}) {
 | 
						|
            @out = remove_matches_2re($ra_in, blank_regex($language),
 | 
						|
                                      $rh_EOL_continuation_re->{$language});
 | 
						|
        } else {
 | 
						|
            @out = remove_matches($ra_in, blank_regex($language));
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- rm_blanks(language=$language, n_remain= ",
 | 
						|
        scalar(@out), "\n" if $opt_v > 2;
 | 
						|
    return @out;
 | 
						|
} # 1}}}
 | 
						|
sub blank_regex {                            # {{{1
 | 
						|
    my ($language) = @_;
 | 
						|
 | 
						|
    print "-> blank_regex(language=$language)\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $blank_regex = '^\s*$';
 | 
						|
    if ($language eq "X++") {
 | 
						|
        $blank_regex = '^\s*#?\s*$';
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- blank_regex(language=$language) = \"", $blank_regex, "\"\n" if $opt_v > 2;
 | 
						|
    return $blank_regex;
 | 
						|
} # 1}}}
 | 
						|
sub rm_comments {                            # {{{1
 | 
						|
    my ($ra_lines , # in, must be free of blank lines
 | 
						|
        $language , # in
 | 
						|
        $file     , # in (some language counters, eg Haskell, need
 | 
						|
                    #     access to the original file)
 | 
						|
        $rh_EOL_continuation_re , # in
 | 
						|
        $raa_Errors , # out
 | 
						|
       ) = @_;
 | 
						|
    print "-> rm_comments(file=$file)\n" if $opt_v > 2;
 | 
						|
    my @routines       = @{$Filters_by_Language{$language}};
 | 
						|
    my @lines          = @{$ra_lines};
 | 
						|
    my @original_lines = @{$ra_lines};
 | 
						|
 | 
						|
    if (!scalar @original_lines) {
 | 
						|
        return @lines;
 | 
						|
    }
 | 
						|
 | 
						|
    foreach my $call_string (@routines) {
 | 
						|
        my $subroutine = $call_string->[0];
 | 
						|
        next if $subroutine eq "rm_comments_in_strings" and !$opt_strip_str_comments;
 | 
						|
        if (! defined &{$subroutine}) {
 | 
						|
            warn "rm_comments undefined subroutine $subroutine for $file\n";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        print "rm_comments file=$file sub=$subroutine\n" if $opt_v > 1;
 | 
						|
        my @args  = @{$call_string};
 | 
						|
        shift @args; # drop the subroutine name
 | 
						|
        if (@args and $args[0] eq '>filename<') {
 | 
						|
            shift   @args;
 | 
						|
            unshift @args, $file;
 | 
						|
        }
 | 
						|
 | 
						|
        # Unusual inputs, namely /* within strings without
 | 
						|
        # a corresponding */ can cause huge delays so put a timer on this.
 | 
						|
        my $max_duration_sec = scalar(@lines)/1000.0; # est lines per second
 | 
						|
           $max_duration_sec = 1.0 if $max_duration_sec < 1;
 | 
						|
        if (defined $opt_timeout) {
 | 
						|
            $max_duration_sec = $opt_timeout if $opt_timeout > 0;
 | 
						|
        }
 | 
						|
#my $T_start = Time::HiRes::time();
 | 
						|
        eval {
 | 
						|
            local $SIG{ALRM} = sub { die "alarm\n" };
 | 
						|
            alarm $max_duration_sec;
 | 
						|
            no strict 'refs';
 | 
						|
            @lines = &{$subroutine}(\@lines, @args);   # apply filter...
 | 
						|
            alarm 0;
 | 
						|
        };
 | 
						|
        if ($@) {
 | 
						|
            # timed out
 | 
						|
            die unless $@ eq "alarm\n";
 | 
						|
            push @{$raa_Errors},
 | 
						|
                [ $Error_Codes{'Line count, exceeded timeout'}, $file ];
 | 
						|
            @lines = ();
 | 
						|
            if ($opt_v) {
 | 
						|
                warn "rm_comments($subroutine): exceeded timeout for $file--ignoring\n";
 | 
						|
            }
 | 
						|
            next;
 | 
						|
        }
 | 
						|
#print "end time = ",Time::HiRes::time() - $T_start;
 | 
						|
 | 
						|
        print "   rm_comments after $subroutine line count=",
 | 
						|
            scalar(@lines), "\n" if $opt_v > 2;
 | 
						|
 | 
						|
#print "lines after=\n";
 | 
						|
#print Dumper(\@lines);
 | 
						|
 | 
						|
        print_lines($file, "After $subroutine(@args)", \@lines)
 | 
						|
            if $opt_print_filter_stages;
 | 
						|
        # then remove blank lines which are created by comment removal
 | 
						|
        if (defined $rh_EOL_continuation_re->{$language}) {
 | 
						|
            @lines = remove_matches_2re(\@lines, '^\s*$',
 | 
						|
                                        $rh_EOL_continuation_re->{$language});
 | 
						|
        } else {
 | 
						|
            @lines = remove_matches(\@lines, '^\s*$');
 | 
						|
        }
 | 
						|
 | 
						|
        print_lines($file, "post $subroutine(@args) blank cleanup:", \@lines)
 | 
						|
            if $opt_print_filter_stages;
 | 
						|
    }
 | 
						|
 | 
						|
    foreach (@lines) { chomp }   # make sure no spurious newlines were added
 | 
						|
 | 
						|
    # Exception for scripting languages:  treat the first #! line as code.
 | 
						|
    # Will need to add it back in if it was removed earlier.
 | 
						|
    chomp( $original_lines[0] );
 | 
						|
    if (defined $Script_Language{$language} and
 | 
						|
        $original_lines[0] =~ /^#!/ and
 | 
						|
        (!scalar(@lines) or ($lines[0] ne $original_lines[0]))) {
 | 
						|
        unshift @lines, $original_lines[0];  # add the first line back
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- rm_comments\n" if $opt_v > 2;
 | 
						|
    return @lines;
 | 
						|
} # 1}}}
 | 
						|
sub rm_code {                                # {{{1
 | 
						|
    # Return lines containing only comments.
 | 
						|
    my ($ra_lines_w_comments , # in
 | 
						|
        $ra_lines_no_comments, # in
 | 
						|
       )  = @_;
 | 
						|
 | 
						|
    my @w_comments_no_blanks  = grep { ! /^\s*$/ } @{$ra_lines_w_comments} ;
 | 
						|
    my @no_comments_no_blanks = grep { ! /^\s*$/ } @{$ra_lines_no_comments};
 | 
						|
    chomp( @w_comments_no_blanks  );
 | 
						|
    chomp( @no_comments_no_blanks );
 | 
						|
 | 
						|
    my @sdiffs = sdiff( \@w_comments_no_blanks, \@no_comments_no_blanks, );
 | 
						|
    my @comments = ();
 | 
						|
    foreach my $entry (@sdiffs) {
 | 
						|
        my ($out_1, $out_2) = ('', '');
 | 
						|
        next if $entry->[0] eq 'u';
 | 
						|
        if ($entry->[0] eq '-') {
 | 
						|
            $out_1 = $entry->[1];
 | 
						|
        }
 | 
						|
        next if $out_1 =~ /^\s*$/;
 | 
						|
        push @comments, $out_1;
 | 
						|
    }
 | 
						|
    return @comments;
 | 
						|
} # 1}}}
 | 
						|
sub remove_first_n {                         # {{{1
 | 
						|
    my ($n, $ra_lines, ) = @_;
 | 
						|
    print "-> remove_first_n\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    if (scalar @{$ra_lines} > $n) {
 | 
						|
        for (my $i = $n; $i < scalar @{$ra_lines}; $i++) {
 | 
						|
            push @save_lines, $ra_lines->[$i];
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- remove_first_n\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_f77_comments {                    # {{{1
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
    print "-> remove_f77_comments\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
        next if m{^[*cC]};
 | 
						|
        next if m{^\s*!};
 | 
						|
        push @save_lines, $_;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- remove_f77_comments\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_f90_comments {                    # {{{1
 | 
						|
    # derived from SLOCCount
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
    print "-> remove_f90_comments\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
        # a comment is              m/^\s*!/
 | 
						|
        # an empty line is          m/^\s*$/
 | 
						|
        # a HPF statement is        m/^\s*!hpf\$/i
 | 
						|
        # an Open MP statement is   m/^\s*!omp\$/i
 | 
						|
        if (! m/^(\s*!|\s*$)/ || m/^\s*!(hpf|omp)\$/i) {
 | 
						|
            push @save_lines, $_;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- remove_f90_comments\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub reduce_to_rmd_code_blocks {              #{{{1
 | 
						|
    my ($ra_lines) = @_; #in
 | 
						|
    print "-> reduce_to_rmd_code_blocks()\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $in_code_block = 0;
 | 
						|
    my @save_lines = ();
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
        if ( m/^```\{\s*[[:alpha:]]/ ) {
 | 
						|
            $in_code_block = 1;
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        if ( m/^```\s*$/ ) {
 | 
						|
            $in_code_block = 0;
 | 
						|
        }
 | 
						|
        next if (!$in_code_block);
 | 
						|
        push @save_lines, $_;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- reduce_to_rmd_code_blocks()\n" if $opt_v> 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_matches {                         # {{{1
 | 
						|
    my ($ra_lines, # in
 | 
						|
        $pattern , # in   Perl regular expression (case insensitive)
 | 
						|
       ) = @_;
 | 
						|
    print "-> remove_matches(pattern=$pattern)\n" if $opt_v > 2;
 | 
						|
    print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
#chomp;
 | 
						|
#print "remove_matches [$pattern] [$_]\n";
 | 
						|
        next if m{$pattern}i;
 | 
						|
#       s{$pattern}{}i;
 | 
						|
#       next unless /\S/; # at least one non space
 | 
						|
        push @save_lines, $_;
 | 
						|
    }
 | 
						|
 | 
						|
    print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | 
						|
    print "<- remove_matches\n" if $opt_v > 2;
 | 
						|
#print "remove_matches returning\n   ", join("\n   ", @save_lines), "\n";
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_matches_2re {                     # {{{1
 | 
						|
    my ($ra_lines, # in
 | 
						|
        $pattern1, # in Perl regex 1 (case insensitive) to match
 | 
						|
        $pattern2, # in Perl regex 2 (case insensitive) to not match prev line
 | 
						|
       ) = @_;
 | 
						|
    print "-> remove_matches_2re(pattern=$pattern1,$pattern2)\n" if $opt_v > 2;
 | 
						|
    print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    for (my $i = 0; $i < scalar @{$ra_lines}; $i++) {
 | 
						|
#       chomp($ra_lines->[$i]);
 | 
						|
#print "remove_matches_2re [$pattern1] [$pattern2] [$ra_lines->[$i]]\n";
 | 
						|
        if ($i) {
 | 
						|
#print "remove_matches_2re prev=[$ra_lines->[$i-1]] this=[$ra_lines->[$i]]\n";
 | 
						|
            next if ($ra_lines->[$i]   =~ m{$pattern1}i) and
 | 
						|
                    ($ra_lines->[$i-1] !~ m{$pattern2}i);
 | 
						|
        } else {
 | 
						|
            # on first line
 | 
						|
            next if $ra_lines->[$i]   =~  m{$pattern1}i;
 | 
						|
        }
 | 
						|
        push @save_lines, $ra_lines->[$i];
 | 
						|
    }
 | 
						|
 | 
						|
    print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | 
						|
    print "<- remove_matches_2re\n" if $opt_v > 2;
 | 
						|
#print "remove_matches_2re returning\n   ", join("\n   ", @save_lines), "\n";
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_inline {                          # {{{1
 | 
						|
    my ($ra_lines, # in
 | 
						|
        $pattern , # in   Perl regular expression (case insensitive)
 | 
						|
       ) = @_;
 | 
						|
    print "-> remove_inline(pattern=$pattern)\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    unless ($opt_inline) {
 | 
						|
        return @{$ra_lines};
 | 
						|
    }
 | 
						|
    my $nLines_affected = 0;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
#chomp; print "remove_inline [$pattern] [$_]\n";
 | 
						|
        if (m{$pattern}i) {
 | 
						|
            ++$nLines_affected;
 | 
						|
            s{$pattern}{}i;
 | 
						|
        }
 | 
						|
        push @save_lines, $_;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- remove_inline\n" if $opt_v > 2;
 | 
						|
#print "remove_inline returning\n   ", join("\n   ", @save_lines), "\n";
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_above {                           # {{{1
 | 
						|
    my ($ra_lines, $marker, ) = @_;
 | 
						|
    print "-> remove_above(marker=$marker)\n" if $opt_v > 2;
 | 
						|
 | 
						|
    # Make two passes through the code:
 | 
						|
    # 1. check if the marker exists
 | 
						|
    # 2. remove anything above the marker if it exists,
 | 
						|
    #    do nothing if the marker does not exist
 | 
						|
 | 
						|
    # Pass 1
 | 
						|
    my $found_marker = 0;
 | 
						|
    for (my $line_number  = 1;
 | 
						|
            $line_number <= scalar @{$ra_lines};
 | 
						|
            $line_number++) {
 | 
						|
        if ($ra_lines->[$line_number-1] =~ m{$marker}) {
 | 
						|
            $found_marker = $line_number;
 | 
						|
            last;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    # Pass 2 only if needed
 | 
						|
    my @save_lines = ();
 | 
						|
    if ($found_marker) {
 | 
						|
        my $n = 1;
 | 
						|
        foreach (@{$ra_lines}) {
 | 
						|
            push @save_lines, $_
 | 
						|
                if $n >= $found_marker;
 | 
						|
            ++$n;
 | 
						|
        }
 | 
						|
    } else { # marker wasn't found; save all lines
 | 
						|
        foreach (@{$ra_lines}) {
 | 
						|
            push @save_lines, $_;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- remove_above\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_below {                           # {{{1
 | 
						|
    my ($ra_lines, $marker, ) = @_;
 | 
						|
    print "-> remove_below(marker=$marker)\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
        last if m{$marker};
 | 
						|
        push @save_lines, $_;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- remove_below\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_below_above {                     # {{{1
 | 
						|
    my ($ra_lines, $marker_below, $marker_above, ) = @_;
 | 
						|
    # delete lines delimited by start and end line markers such
 | 
						|
    # as Perl POD documentation
 | 
						|
    print "-> remove_below_above(markerB=$marker_below, A=$marker_above)\n"
 | 
						|
        if $opt_v > 2;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    my $between    = 0;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
        if (!$between and m{$marker_below}) {
 | 
						|
            $between    = 1;
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        if ($between and m{$marker_above}) {
 | 
						|
            $between    = 0;
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        next if $between;
 | 
						|
        push @save_lines, $_;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- remove_below_above\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_between {                         # {{{1
 | 
						|
    my ($ra_lines, $marker, ) = @_;
 | 
						|
    # $marker must contain one of the balanced pairs understood
 | 
						|
    # by Regexp::Common::balanced, namely
 | 
						|
    # '{}'  '()'  '[]'  or  '<>'
 | 
						|
 | 
						|
    print "-> remove_between(marker=$marker)\n" if $opt_v > 2;
 | 
						|
    print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | 
						|
    my %acceptable = ('{}'=>1,  '()'=>1,  '[]'=>1,  '<>'=>1, );
 | 
						|
    die "remove_between:  invalid delimiter '$marker'\n",
 | 
						|
        "the delimiter must be one of these four pairs:\n",
 | 
						|
        "{}  ()  []  <>\n" unless
 | 
						|
        $acceptable{$marker};
 | 
						|
 | 
						|
    Install_Regexp_Common() unless $HAVE_Rexexp_Common;
 | 
						|
 | 
						|
    my $all_lines = join("", @{$ra_lines});
 | 
						|
 | 
						|
    no strict 'vars';
 | 
						|
    # otherwise get:
 | 
						|
    #  Global symbol "%RE" requires explicit package name at cloc line xx.
 | 
						|
    if ($all_lines =~ m/$RE{balanced}{-parens => $marker}/) {
 | 
						|
        no warnings;
 | 
						|
        $all_lines =~ s/$1//g;
 | 
						|
    }
 | 
						|
 | 
						|
    print "[", join("]\n[", split(/\n/, $all_lines)), "]\n" if $opt_v > 4;
 | 
						|
    print "<- remove_between\n" if $opt_v > 2;
 | 
						|
    return split("\n", $all_lines);
 | 
						|
} # 1}}}
 | 
						|
sub rm_comments_in_strings {                 # {{{1
 | 
						|
    my ($ra_lines, $string_marker, $start_comment, $end_comment, $multiline_mode) = @_;
 | 
						|
    $multiline_mode = 0 if not defined $multiline_mode;
 | 
						|
    # Replace comments within strings with 'xx'.
 | 
						|
 | 
						|
    print "-> rm_comments_in_strings(string_marker=$string_marker, " .
 | 
						|
          "start_comment=$start_comment, end_comment=$end_comment)\n"
 | 
						|
        if $opt_v > 2;
 | 
						|
    print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    my $in_ml_string = 0;
 | 
						|
    foreach my $line (@{$ra_lines}) {
 | 
						|
       #print "line=[$line]\n";
 | 
						|
        my $new_line = "";
 | 
						|
 | 
						|
        if ($line !~ /${string_marker}/) {
 | 
						|
            # short circuit; no strings on this line
 | 
						|
            if ( $in_ml_string ) {
 | 
						|
                $line =~ s/\Q${start_comment}\E/xx/g;
 | 
						|
                $line =~ s/\Q${end_comment}\E/xx/g if $end_comment;
 | 
						|
            }
 | 
						|
            push @save_lines, $line;
 | 
						|
            next;
 | 
						|
        }
 | 
						|
 | 
						|
        # replace backslashed string markers with 'Q'
 | 
						|
        $line =~ s/\\${string_marker}/Q/g;
 | 
						|
 | 
						|
        if ( $in_ml_string and $line =~ /^(.*?)(${string_marker})(.*)$/ ) {
 | 
						|
            # A multiline string ends on this line. Process the part
 | 
						|
            # until the end of the multiline string first.
 | 
						|
            my ($lastpart_ml_string, $firstpart_marker, $rest_of_line )  = ($1, $2, $3);
 | 
						|
            $lastpart_ml_string =~ s/\Q${start_comment}\E/xx/g;
 | 
						|
            $lastpart_ml_string =~ s/\Q${end_comment}\E/xx/g if $end_comment;
 | 
						|
            $new_line = $lastpart_ml_string . $firstpart_marker;
 | 
						|
            $line = $rest_of_line;
 | 
						|
            $in_ml_string = 0;
 | 
						|
        }
 | 
						|
 | 
						|
        my @tokens = split(/(${string_marker}.*?${string_marker})/, $line);
 | 
						|
        foreach my $t (@tokens) {
 | 
						|
           #printf "  t0 = [$t]\n";
 | 
						|
            if ($t =~ /${string_marker}.*${string_marker}$/) {
 | 
						|
                # enclosed in quotes; process this token
 | 
						|
                $t =~ s/\Q${start_comment}\E/xx/g;
 | 
						|
                $t =~ s/\Q${end_comment}\E/xx/g if $end_comment;
 | 
						|
            }
 | 
						|
            elsif ( $multiline_mode and $t =~ /(${string_marker})/ ) {
 | 
						|
                # Unclosed quote present in line. If multiline_mode is enabled,
 | 
						|
                # consider it the start of a multiline string.
 | 
						|
                my $firstpart_marker = $1;
 | 
						|
                my @sub_token = split(/${string_marker}/, $t );
 | 
						|
 | 
						|
                if ( scalar @sub_token == 1 ) {
 | 
						|
                    # The line ends with a string marker that starts
 | 
						|
                    # a multiline string.
 | 
						|
                    $t = $sub_token[0] . $firstpart_marker;
 | 
						|
                    $in_ml_string = 1;
 | 
						|
                }
 | 
						|
                elsif ( scalar @sub_token == 2 ) {
 | 
						|
                    # The line has some more content after the string
 | 
						|
                    # marker that starts a multiline string
 | 
						|
                    $t = $sub_token[0] . $firstpart_marker;
 | 
						|
                    $sub_token[1] =~ s/\Q${start_comment}\E/xx/g;
 | 
						|
                    $sub_token[1] =~ s/\Q${end_comment}\E/xx/g if $end_comment;
 | 
						|
                    $t .= $sub_token[1];
 | 
						|
                    $in_ml_string = 1;
 | 
						|
                } else {
 | 
						|
                    print "Warning: rm_comments_in_string length \@sub_token > 2\n";
 | 
						|
                }
 | 
						|
 | 
						|
            }
 | 
						|
            #printf "  t1 = [$t]\n";
 | 
						|
            $new_line .= $t;
 | 
						|
        }
 | 
						|
        push @save_lines, $new_line;
 | 
						|
    }
 | 
						|
 | 
						|
    print "[", join("]\n[", @save_lines), "]\n" if $opt_v > 4;
 | 
						|
    print "<- rm_comments_in_strings\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_between_general {                 # {{{1
 | 
						|
    my ($ra_lines, $start_marker, $end_marker, ) = @_;
 | 
						|
    # Start and end markers may be any length strings.
 | 
						|
 | 
						|
    print "-> remove_between_general(start=$start_marker, end=$end_marker)\n"
 | 
						|
        if $opt_v > 2;
 | 
						|
 | 
						|
    my $all_lines = join("", @{$ra_lines});
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    my $in_comment = 0;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
 | 
						|
        next if /^\s*$/;
 | 
						|
        s/\Q$start_marker\E.*?\Q$end_marker\E//g;  # strip one-line comments
 | 
						|
        next if /^\s*$/;
 | 
						|
        if ($in_comment) {
 | 
						|
            if (/\Q$end_marker\E/) {
 | 
						|
                s/^.*?\Q$end_marker\E//;
 | 
						|
                $in_comment = 0;
 | 
						|
            }
 | 
						|
            next if $in_comment;
 | 
						|
        }
 | 
						|
        next if /^\s*$/;
 | 
						|
        $in_comment = 1 if /^(.*?)\Q$start_marker\E/; # $1 may be blank or code
 | 
						|
        next if defined $1 and $1 =~ /^\s*$/; # leading blank; all comment
 | 
						|
        if ($in_comment) {
 | 
						|
            # part code, part comment; strip the comment and keep the code
 | 
						|
            s/^(.*?)\Q$start_marker\E.*$/$1/;
 | 
						|
        }
 | 
						|
        push @save_lines, $_;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- remove_between_general\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_between_regex   {                 # {{{1
 | 
						|
    my ($ra_lines, $start_RE, $end_RE, ) = @_;
 | 
						|
    # Start and end regex's may be any length strings.
 | 
						|
 | 
						|
    print "-> remove_between_regex(start=$start_RE, end=$end_RE)\n"
 | 
						|
        if $opt_v > 2;
 | 
						|
 | 
						|
    my $all_lines = join("", @{$ra_lines});
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    my $in_comment = 0;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
 | 
						|
        next if /^\s*$/;
 | 
						|
        s/${start_RE}.*?${end_RE}//g;  # strip one-line comments
 | 
						|
        next if /^\s*$/;
 | 
						|
        if ($in_comment) {
 | 
						|
            if (/$end_RE/) {
 | 
						|
                s/^.*?${end_RE}//;
 | 
						|
                $in_comment = 0;
 | 
						|
            }
 | 
						|
            next if $in_comment;
 | 
						|
        }
 | 
						|
        next if /^\s*$/;
 | 
						|
        $in_comment = 1 if /^(.*?)${start_RE}/; # $1 may be blank or code
 | 
						|
        next if defined $1 and $1 =~ /^\s*$/; # leading blank; all comment
 | 
						|
        if ($in_comment) {
 | 
						|
            # part code, part comment; strip the comment and keep the code
 | 
						|
            s/^(.*?)${start_RE}.*$/$1/;
 | 
						|
        }
 | 
						|
        push @save_lines, $_;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- remove_between_regex\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub replace_regex  {                         # {{{1
 | 
						|
    my ($ra_lines, $regex, $replace, ) = @_;
 | 
						|
 | 
						|
    print "-> replace_regex(regex=$regex, replace=$replace)\n"
 | 
						|
        if $opt_v > 2;
 | 
						|
 | 
						|
    my $all_lines = join("", @{$ra_lines});
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    my $in_comment = 0;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
 | 
						|
        next if /^\s*$/;
 | 
						|
        s/${regex}/${replace}/g;
 | 
						|
        next if /^\s*$/;
 | 
						|
        push @save_lines, $_;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- replace_regex\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub replace_between_regex  {                 # {{{1
 | 
						|
    my ($ra_lines, $start_RE, $end_RE, $replace_RE, $multiline_mode ) = @_;
 | 
						|
    # If multiline_mode is enabled, $replace_RE should not refer
 | 
						|
    # to any captured groups in $start_RE.
 | 
						|
    $multiline_mode = 1 if not defined $multiline_mode;
 | 
						|
    # Start and end regex's may be any length strings.
 | 
						|
 | 
						|
    print "-> replace_between_regex(start=$start_RE, end=$end_RE, replace=$replace_RE)\n"
 | 
						|
        if $opt_v > 2;
 | 
						|
 | 
						|
    my $all_lines = join("", @{$ra_lines});
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    my $in_comment = 0;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
 | 
						|
        next if /^\s*$/;
 | 
						|
        s/${start_RE}.*?${end_RE}/${replace_RE}/eeg;  # strip one-line comments
 | 
						|
        next if /^\s*$/;
 | 
						|
        if ($in_comment) {
 | 
						|
            if (/$end_RE/) {
 | 
						|
                s/^.*?${end_RE}/${replace_RE}/ee;
 | 
						|
                $in_comment = 0;
 | 
						|
            }
 | 
						|
            next if $in_comment;
 | 
						|
        }
 | 
						|
        next if /^\s*$/;
 | 
						|
        $in_comment = 1 if $multiline_mode and /^(.*?)${start_RE}/ ; # $1 may be blank or code
 | 
						|
        next if defined $1 and $1 =~ /^\s*$/; # leading blank; all comment
 | 
						|
        if ($in_comment) {
 | 
						|
            # part code, part comment; strip the comment and keep the code
 | 
						|
            s/^(.*?)${start_RE}.*$/$1/;
 | 
						|
        }
 | 
						|
        push @save_lines, $_;
 | 
						|
    }
 | 
						|
 | 
						|
    print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | 
						|
    print "<- replace_between_regex\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_cobol_blanks {                    # {{{1
 | 
						|
    # subroutines derived from SLOCCount
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
 | 
						|
    my $free_format = 0;  # Support "free format" source code.
 | 
						|
    my @save_lines  = ();
 | 
						|
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
        next if m/^\s*$/;
 | 
						|
        my $line = expand($_);  # convert tabs to equivalent spaces
 | 
						|
        $free_format = 1 if $line =~ m/^......\$.*SET.*SOURCEFORMAT.*FREE/i;
 | 
						|
        if ($free_format) {
 | 
						|
            push @save_lines, $_;
 | 
						|
        } else {
 | 
						|
            # Greg Toth:
 | 
						|
            #  (1) Treat lines with any alphanum in cols 1-6 and
 | 
						|
            #      blanks in cols 7 through 71 as blank line, and
 | 
						|
            #  (2) Treat lines with any alphanum in cols 1-6 and
 | 
						|
            #      slash (/) in col 7 as blank line (this is a
 | 
						|
            #      page eject directive).
 | 
						|
            push @save_lines, $_ unless m/^\d{6}\s*$/             or
 | 
						|
                                        ($line =~ m/^.{6}\s{66}/) or
 | 
						|
                                        ($line =~ m/^......\//);
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_cobol_comments {                  # {{{1
 | 
						|
    # subroutines derived from SLOCCount
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
 | 
						|
    my $free_format = 0;  # Support "free format" source code.
 | 
						|
    my @save_lines  = ();
 | 
						|
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
        if (m/^......\$.*SET.*SOURCEFORMAT.*FREE/i) {$free_format = 1;}
 | 
						|
        if ($free_format) {
 | 
						|
            push @save_lines, $_ unless m{^\s*\*};
 | 
						|
        } else {
 | 
						|
            push @save_lines, $_ unless m{^......\*} or m{^\*};
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_jcl_comments {                    # {{{1
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
 | 
						|
    print "-> remove_jcl_comments\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    my $in_comment = 0;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
        next if /^\s*$/;
 | 
						|
        next if m{^//\*};
 | 
						|
        last if m{^\s*//\s*$};
 | 
						|
        push @save_lines, $_;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- remove_jcl_comments\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_jsp_comments {                    # {{{1
 | 
						|
    #  JSP comment is   <%--  body of comment   --%>
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
 | 
						|
    print "-> remove_jsp_comments\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    my $in_comment = 0;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
 | 
						|
        next if /^\s*$/;
 | 
						|
        s/<\%\-\-.*?\-\-\%>//g;  # strip one-line comments
 | 
						|
        next if /^\s*$/;
 | 
						|
        if ($in_comment) {
 | 
						|
            if (/\-\-\%>/) {
 | 
						|
                s/^.*?\-\-\%>//;
 | 
						|
                $in_comment = 0;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        next if /^\s*$/;
 | 
						|
        $in_comment = 1 if /^(.*?)<\%\-\-/;
 | 
						|
        next if defined $1 and $1 =~ /^\s*$/;
 | 
						|
        next if ($in_comment);
 | 
						|
        push @save_lines, $_;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- remove_jsp_comments\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_html_comments {                   # {{{1
 | 
						|
    #  HTML comment is   <!--  body of comment   -->
 | 
						|
    #  Need to use my own routine until the HTML comment regex in
 | 
						|
    #  the Regexp::Common module can handle  <!--  --  -->
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
 | 
						|
    print "-> remove_html_comments\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    my $in_comment = 0;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
 | 
						|
        next if /^\s*$/;
 | 
						|
        s/<!\-\-.*?\-\->//g;  # strip one-line comments
 | 
						|
        next if /^\s*$/;
 | 
						|
        if ($in_comment) {
 | 
						|
            if (/\-\->/) {
 | 
						|
                s/^.*?\-\->//;
 | 
						|
                $in_comment = 0;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        next if /^\s*$/;
 | 
						|
        $in_comment = 1 if /^(.*?)<!\-\-/;
 | 
						|
        if (defined $1 and $1 !~ /^\s*$/) {
 | 
						|
            # has both code and comment
 | 
						|
            push @save_lines, "$1\n";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        next if ($in_comment);
 | 
						|
        push @save_lines, $_;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- remove_html_comments\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_bf_comments {                     # {{{1
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
 | 
						|
    print "-> remove_bf_comments\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    my $in_comment = 0;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
 | 
						|
        s/[^<>+-.,\[\]]+//g;
 | 
						|
        next if /^\s*$/;
 | 
						|
        push @save_lines, $_;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- remove_bf_comments\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub really_is_bf {                           # {{{1
 | 
						|
    my ($file, ) = @_;
 | 
						|
 | 
						|
    print "-> really_is_bf\n" if $opt_v > 2;
 | 
						|
    my $n_bf_indicators  = 0;
 | 
						|
    my @lines = read_file($file);
 | 
						|
    foreach my $L (@lines) {
 | 
						|
        my $ind = 0;
 | 
						|
        if ($L =~ /([+-]{4,}  |          # at least four +'s or -'s in a row
 | 
						|
                   [\[\]]{4,} |          # at least four [ or ] in a row
 | 
						|
                   [<>][+-]   |          # >- or >+ or <+ or <-
 | 
						|
                   <{3,}      |          # at least three < in a row
 | 
						|
                   ^\s*[\[\]]\s*$)/x) {  # [ or ] on line by itself
 | 
						|
            ++$n_bf_indicators;
 | 
						|
            $ind = 1;
 | 
						|
        }
 | 
						|
        # if ($ind) { print "YES: $L"; } else { print "NO : $L"; }
 | 
						|
    }
 | 
						|
    my $ratio = scalar(@lines) > 0 ? $n_bf_indicators / scalar(@lines) : 0;
 | 
						|
    my $decision = ($ratio > 0.5) || ($n_bf_indicators > 5);
 | 
						|
    printf "<- really_is_bf(Y/N=%d %s, R=%.3f, N=%d)\n",
 | 
						|
            $decision, $file, $ratio, $n_bf_indicators if $opt_v > 2;
 | 
						|
    return $decision;
 | 
						|
} # 1}}}
 | 
						|
sub remove_indented_block {                  # {{{1
 | 
						|
    # Haml block comments are defined by a silent comment marker like
 | 
						|
    #    /
 | 
						|
    # or
 | 
						|
    #    -#
 | 
						|
    # followed by indented text on subsequent lines.
 | 
						|
    # http://haml.info/docs/yardoc/file.REFERENCE.html#comments
 | 
						|
    my ($ra_lines, $regex, ) = @_;
 | 
						|
 | 
						|
    print "-> remove_indented_block\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    my $in_comment = 0;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
 | 
						|
        next if /^\s*$/;
 | 
						|
        my $line = expand($_);  # convert tabs to equivalent spaces
 | 
						|
        if ($in_comment) {
 | 
						|
            $line =~ /^(\s*)/;
 | 
						|
            # print "indent=", length $1, "\n";
 | 
						|
            if (length $1 < $in_comment) {
 | 
						|
                # indent level is less than comment level
 | 
						|
                # are back in code
 | 
						|
                $in_comment = 0;
 | 
						|
            } else {
 | 
						|
                # still in comments, don't use this line
 | 
						|
                next;
 | 
						|
            }
 | 
						|
        } elsif ($line =~ m{$regex}) {
 | 
						|
            if ($1) {
 | 
						|
                $in_comment = length($1) + 1; # number of leading spaces + 1
 | 
						|
            } else {
 | 
						|
                $in_comment = 1;
 | 
						|
            }
 | 
						|
            # print "in_comment=$in_comment\n";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        push @save_lines, $line;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- remove_indented_block\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_haml_block {                      # {{{1
 | 
						|
    # Haml block comments are defined by a silent comment marker like
 | 
						|
    #    /
 | 
						|
    # or
 | 
						|
    #    -#
 | 
						|
    # followed by indented text on subsequent lines.
 | 
						|
    # http://haml.info/docs/yardoc/file.REFERENCE.html#comments
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
 | 
						|
    return remove_indented_block($ra_lines, '^(\s*)(/|-#)\s*$');
 | 
						|
 | 
						|
} # 1}}}
 | 
						|
sub remove_pug_block {                       # {{{1
 | 
						|
    # Haml block comments are defined by a silent comment marker like
 | 
						|
    #    //
 | 
						|
    # followed by indented text on subsequent lines.
 | 
						|
    # http://jade-lang.com/reference/comments/
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
    return remove_indented_block($ra_lines, '^(\s*)(//)\s*$');
 | 
						|
} # 1}}}
 | 
						|
sub remove_OCaml_comments {                  # {{{1
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
 | 
						|
    print "-> remove_OCaml_comments\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    my $in_comment = 0;   # counter to depth of nested comments
 | 
						|
    foreach my $L (@{$ra_lines}) {
 | 
						|
        next if $L =~ /^\s*$/;
 | 
						|
        # make an array of tokens where a token is a start comment
 | 
						|
        # marker, end comment marker, string, or anything else
 | 
						|
        my $clean_line = ""; # ie, free of comments
 | 
						|
        my @tokens = split(/(\(\*|\*\)|".*?")/, $L);
 | 
						|
        foreach my $t (@tokens) {
 | 
						|
            next unless $t;
 | 
						|
            if      ($t eq "(*") {
 | 
						|
                ++$in_comment;
 | 
						|
            } elsif ($t eq "*)") {
 | 
						|
                --$in_comment;
 | 
						|
            } elsif (!$in_comment) {
 | 
						|
                $clean_line .= $t;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        push @save_lines, $clean_line if $clean_line;
 | 
						|
    }
 | 
						|
    print "<- remove_OCaml_comments\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_TLAPlus_generated_code {          # {{{1
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
    # If --no-autogen, remove code created by the PlusCal translator.
 | 
						|
    return @{$ra_lines} if !$opt_no_autogen;
 | 
						|
    return remove_between_regex($ra_lines, '^\\\\\\* BEGIN TRANSLATION\b', '^\\\\\\* END TRANSLATION\b');
 | 
						|
} # 1}}}
 | 
						|
sub remove_TLAPlus_comments {                # {{{1
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
    # TLA+ block comments are like OCaml comments (remove_OCaml_comments).
 | 
						|
    # However, TLA+ comments may contain PlusCal code, and we must consider the
 | 
						|
    # PlusCal code as code, not comments.
 | 
						|
 | 
						|
    print "-> remove_TLAPlus_comments\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    my $in_comment = 0;        # counter to depth of nested (* *) comments
 | 
						|
    my $in_pluscal_head = 0;   # whether we saw the start of PlusCal code
 | 
						|
    my $in_pluscal_block = 0;  # counter to depth of nested { } blocks
 | 
						|
    my $saved_in_comment = 0;  # $in_comment before we entered PlusCal code
 | 
						|
    foreach my $L (@{$ra_lines}) {
 | 
						|
        next if $L =~ /^\s*$/;
 | 
						|
        # make an array of interesting tokens we need to act upon
 | 
						|
        my $clean_line = ""; # ie, free of comments
 | 
						|
        my @tokens = split(/(\(\*|\*\)|".*?"|[{}]|--fair\b|--algorithm|PlusCal options)/, $L);
 | 
						|
        foreach my $t (@tokens) {
 | 
						|
            next unless $t;
 | 
						|
            if      ($t eq "(*") {
 | 
						|
                ++$in_comment;
 | 
						|
            } elsif ($t eq "*)") {
 | 
						|
                --$in_comment;
 | 
						|
            } elsif ($t eq "{" && $in_pluscal_head) {
 | 
						|
                # start block matching when we see '{' in '--algorithm NAME {'
 | 
						|
                $in_pluscal_head = 0;
 | 
						|
                $in_pluscal_block = 1;
 | 
						|
                $saved_in_comment = $in_comment;
 | 
						|
                $in_comment = 0;
 | 
						|
                $clean_line .= $t;
 | 
						|
            } elsif ($t eq "{" && $in_pluscal_block && !$in_comment) {
 | 
						|
                ++$in_pluscal_block;
 | 
						|
                $clean_line .= $t;
 | 
						|
            } elsif ($t eq "}" && $in_pluscal_block && !$in_comment) {
 | 
						|
                --$in_pluscal_block;
 | 
						|
                if ($in_pluscal_block == 0) {
 | 
						|
                    $in_comment = $saved_in_comment;
 | 
						|
                }
 | 
						|
                $clean_line .= $t;
 | 
						|
            } elsif ($t eq "--fair" || $t eq "--algorithm") {
 | 
						|
                $in_pluscal_head = 1;
 | 
						|
                $clean_line .= $t;
 | 
						|
            } elsif (!$in_comment || $in_pluscal_head || $in_pluscal_block || $t eq "PlusCal options") {
 | 
						|
                $clean_line .= $t;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        push @save_lines, $clean_line if $clean_line;
 | 
						|
    }
 | 
						|
    print "<- remove_TLAPlus_comments\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub remove_slim_block {                      # {{{1
 | 
						|
    # slim comments start with /
 | 
						|
    # followed by indented text on subsequent lines.
 | 
						|
    # http://www.rubydoc.info/gems/slim/frames
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
    return remove_indented_block($ra_lines, '^(\s*)(/[^!])');
 | 
						|
} # 1}}}
 | 
						|
sub add_newlines {                           # {{{1
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
    print "-> add_newlines \n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
 | 
						|
        push @save_lines, "$_\n";
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- add_newlines \n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub docstring_to_C {                         # {{{1
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
    # Converts Python docstrings to C comments.
 | 
						|
 | 
						|
    if ($opt_docstring_as_code) {
 | 
						|
        return @{$ra_lines};
 | 
						|
    }
 | 
						|
 | 
						|
    print "-> docstring_to_C()\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $in_docstring = 0;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
        while (/((""")|('''))/) {
 | 
						|
            if (!$in_docstring) {
 | 
						|
                s{[uU]?((""")|('''))}{/*};
 | 
						|
                $in_docstring = 1;
 | 
						|
            } else {
 | 
						|
                s{((""")|('''))}{*/};
 | 
						|
                $in_docstring = 0;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- docstring_to_C\n" if $opt_v > 2;
 | 
						|
    return @{$ra_lines};
 | 
						|
} # 1}}}
 | 
						|
sub docstring_rm_comments {                  # {{{1
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
    # Remove embedded C/C++ style comments in docstrings.
 | 
						|
 | 
						|
    print "-> docstring_rm_comments()\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $in_docstring = 0;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
        if (/((""")|('''))(.*?)\1/) {
 | 
						|
            # single line docstring
 | 
						|
            my ($i_start, $i_end) = ($-[0]+3, $+[0]-3);
 | 
						|
            # replace /*, */, // with xx
 | 
						|
            substr($_, $i_start, $i_end-$i_start) =~ s{(/\*|\*/|//)}{xx}g;
 | 
						|
            next;
 | 
						|
        } elsif (m{/\*.*?((""")|(''')).*?\*/}) {
 | 
						|
            # docstring start or end within /* */ comments
 | 
						|
            my $i_start = $-[0]+2;
 | 
						|
            substr($_, $i_start, 3) = "xxx";
 | 
						|
        } elsif (m{//.*?((""")|('''))}) {
 | 
						|
            # docstring start or end after //
 | 
						|
            my $i_start = $-[0]+2;
 | 
						|
            substr($_, $i_start, 3) = "xxx";
 | 
						|
        } elsif (/^(.*?)((""")|('''))/ and  $in_docstring) {
 | 
						|
            $in_docstring = 0;
 | 
						|
            my $i_end = length $1;
 | 
						|
            if ($i_end) {
 | 
						|
                substr($_, 0, $i_end) =~ s{(/\*|\*/|//)}{xx}g;
 | 
						|
            }
 | 
						|
        } elsif (/((""")|('''))(.*?)$/ and !$in_docstring) {
 | 
						|
            $in_docstring = 1;
 | 
						|
            my $i_start = $-[0]+3;
 | 
						|
            substr($_, $i_start) =~ s{(/\*|\*/|//)}{xx}g;
 | 
						|
        } elsif ($in_docstring) {
 | 
						|
            s{(/\*|\*/|//)}{xx}g;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | 
						|
    print "<- docstring_rm_comments\n" if $opt_v > 2;
 | 
						|
    return @{$ra_lines};
 | 
						|
} # 1}}}
 | 
						|
sub jupyter_nb {                             # {{{1
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
    # Translate .ipynb file content into an equivalent set of code
 | 
						|
    # lines as expected by cloc.
 | 
						|
 | 
						|
    print "-> jupyter_nb()\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    my $in_code   = 0;
 | 
						|
    my $in_source = 0;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
        if (!$in_code and !$in_source and /^\s*"cell_type":\s*"code",\s*$/) {
 | 
						|
            $in_code = 1;
 | 
						|
        } elsif ($in_code and !$in_source and /^\s*"source":\s*\[\s*$/) {
 | 
						|
            $in_source = 1;
 | 
						|
        } elsif ($in_code and $in_source) {
 | 
						|
            if (/^\s*"\s*\\n",\s*$/) {    #  "\n",  -> empty line
 | 
						|
                next;
 | 
						|
            } elsif (/^\s*"\s*#/) {       #  comment within the code block
 | 
						|
                next;
 | 
						|
            } elsif (/^\s*\]\s*$/) {
 | 
						|
                $in_code   = 0;
 | 
						|
                $in_source = 0;
 | 
						|
            } else {
 | 
						|
                push @save_lines, $_;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- jupyter_nb\n" if $opt_v > 2;
 | 
						|
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub elixir_doc_to_C {                        # {{{1
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
    # Converts Elixir docs to C comments.
 | 
						|
 | 
						|
    print "-> elixir_doc_to_C()\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $in_docstring = 0;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
        if (!$in_docstring && /(\@(module)?doc\s+(~[sScC])?['"]{3})/) {
 | 
						|
            s{$1}{/*};
 | 
						|
            $in_docstring = 1;
 | 
						|
        } elsif ($in_docstring && /(['"]{3})/) {
 | 
						|
            s{$1}{*/};
 | 
						|
            $in_docstring = 0;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- elixir_doc_to_C\n" if $opt_v > 2;
 | 
						|
    return @{$ra_lines};
 | 
						|
} # 1}}}
 | 
						|
sub Forth_paren_to_C  {                      # {{{1
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
    # Converts Forth comment parentheses to C comments.
 | 
						|
 | 
						|
    print "-> Forth_paren_to_C()\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $in_comment = 0;
 | 
						|
    my $max_paren_pair_per_line = 255;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
#print "Forth_paren_to_C: [$_]\n";
 | 
						|
        my $n_iter = 0;
 | 
						|
        while (/\s\(\s/ or ($in_comment and /\)/)) {
 | 
						|
#print "TOP n_iter=$n_iter in_comment=$in_comment\n";
 | 
						|
            if (/\s\(\s.*?\)/) {
 | 
						|
                # in-line parenthesis comment; handle here
 | 
						|
                s/\s+\(\s+.*?\)//g;
 | 
						|
#print "B\n";
 | 
						|
            } elsif (!$in_comment and /\s\(\s/) {
 | 
						|
                s{\s+\(\s+}{/*};
 | 
						|
#print "C\n";
 | 
						|
                $in_comment = 1;
 | 
						|
            } elsif ($in_comment and /\)/) {
 | 
						|
                s{\)}{*/};
 | 
						|
#print "D\n";
 | 
						|
                $in_comment = 0;
 | 
						|
            } else {
 | 
						|
                # gets here if it can't find a matching
 | 
						|
                # close parenthesis; in this case the
 | 
						|
                # results will likely be incorrect
 | 
						|
                ++$n_iter;
 | 
						|
#print "E\n";
 | 
						|
                last if $n_iter > $max_paren_pair_per_line;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- Forth_paren_to_C\n" if $opt_v > 2;
 | 
						|
    return @{$ra_lines};
 | 
						|
} # 1}}}
 | 
						|
sub powershell_to_C {                        # {{{1
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
    # Converts PowerShell block comment markers to C comments.
 | 
						|
 | 
						|
    print "-> powershell_to_C()\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $in_docstring = 0;
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
        s{<#}{/*}g;
 | 
						|
        s{#>}{*/}g;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- powershell_to_C\n" if $opt_v > 2;
 | 
						|
    return @{$ra_lines};
 | 
						|
} # 1}}}
 | 
						|
sub smarty_to_C {                            # {{{1
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
    # Converts Smarty comments to C comments.
 | 
						|
 | 
						|
    print "-> smarty_to_C()\n" if $opt_v > 2;
 | 
						|
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
        s[{\*][/*]g;
 | 
						|
        s[\*}][*/]g;
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- smarty_to_C\n" if $opt_v > 2;
 | 
						|
    return @{$ra_lines};
 | 
						|
} # 1}}}
 | 
						|
sub determine_lit_type {                     # {{{1
 | 
						|
  my ($file) = @_;
 | 
						|
 | 
						|
  my $FILE = open_file('<', $file, 0);
 | 
						|
  while (<$FILE>) {
 | 
						|
    if (m/^\\begin\{code\}/) { close $FILE; return 2; }
 | 
						|
    if (m/^>\s/) { close $FILE; return 1; }
 | 
						|
  }
 | 
						|
 | 
						|
  return 0;
 | 
						|
} # 1}}}
 | 
						|
sub remove_haskell_comments {                # {{{1
 | 
						|
    # SLOCCount's haskell_count script with modifications to handle
 | 
						|
    # Elm empty and nested block comments.
 | 
						|
    # Strips out {- .. -} and -- comments and counts the rest.
 | 
						|
    # Pragmas, {-#...}, are counted as SLOC.
 | 
						|
    # BUG: Doesn't handle strings with embedded block comment markers gracefully.
 | 
						|
    #      In practice, that shouldn't be a problem.
 | 
						|
    my ($ra_lines, $file, ) = @_;
 | 
						|
 | 
						|
    print "-> remove_haskell_comments\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @save_lines = ();
 | 
						|
    my $in_comment = 0;
 | 
						|
    my $incomment  = 0;
 | 
						|
    my $inlitblock = 0;
 | 
						|
    my $literate   = 0;
 | 
						|
    my $is_elm     = 0;
 | 
						|
 | 
						|
    $is_elm   = 1 if $file =~ /\.elm$/;
 | 
						|
    $literate = 1 if $file =~ /\.lhs$/;
 | 
						|
    if ($literate) { $literate = determine_lit_type($file) }
 | 
						|
 | 
						|
    foreach (@{$ra_lines}) {
 | 
						|
        chomp;
 | 
						|
        if ($literate == 1) {
 | 
						|
            if (!s/^>//) { s/.*//; }
 | 
						|
        } elsif ($literate == 2) {
 | 
						|
            if ($inlitblock) {
 | 
						|
                if (m/^\\end\{code\}/) { s/.*//; $inlitblock = 0; }
 | 
						|
            } elsif (!$inlitblock) {
 | 
						|
                if (m/^\\begin\{code\}/) { s/.*//; $inlitblock = 1; }
 | 
						|
                else { s/.*//; }
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        # keep pragmas
 | 
						|
        if (/^\s*{-#/) {
 | 
						|
            push @save_lines, $_;
 | 
						|
            next;
 | 
						|
        }
 | 
						|
 | 
						|
        # Elm allows nested comments so track nesting depth
 | 
						|
        # with $incomment.
 | 
						|
 | 
						|
        my $n_open  = () = $_ =~ /{-/g;
 | 
						|
        my $n_close = () = $_ =~ /-}/g;
 | 
						|
        s/{-.*?-}//g;
 | 
						|
 | 
						|
        if ($incomment) {
 | 
						|
            if (m/\-\}/) {
 | 
						|
                s/^.*?\-\}//;
 | 
						|
                if ($is_elm) {
 | 
						|
                    $incomment += $n_open - $n_close;
 | 
						|
                } else {
 | 
						|
                    $incomment = 0;
 | 
						|
                }
 | 
						|
            } else {
 | 
						|
                s/.*//;
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            s/--.*//;
 | 
						|
            if (m/{-/ && (!m/{-#/)) {
 | 
						|
                s/{-.*//;
 | 
						|
                if ($is_elm) {
 | 
						|
                    $incomment += $n_open - $n_close;
 | 
						|
                } else {
 | 
						|
                    $incomment = 1;
 | 
						|
                }
 | 
						|
            }
 | 
						|
        }
 | 
						|
        if (m/\S/) { push @save_lines, $_; }
 | 
						|
    }
 | 
						|
#   if ($incomment) {print "ERROR: ended in comment in $ARGV\n";}
 | 
						|
 | 
						|
    print "<- remove_haskell_comments\n" if $opt_v > 2;
 | 
						|
    return @save_lines;
 | 
						|
} # 1}}}
 | 
						|
sub print_lines {                            # {{{1
 | 
						|
    my ($file     , # in
 | 
						|
        $title    , # in
 | 
						|
        $ra_lines , # in
 | 
						|
       ) = @_;
 | 
						|
    printf "->%-30s %s\n", $file, $title;
 | 
						|
    for (my $i = 0; $i < scalar @{$ra_lines}; $i++) {
 | 
						|
        printf "%5d | %s", $i+1, $ra_lines->[$i];
 | 
						|
        print "\n" unless $ra_lines->[$i] =~ m{\n$}
 | 
						|
    }
 | 
						|
} # 1}}}
 | 
						|
sub set_constants {                          # {{{1
 | 
						|
    my ($rh_Language_by_Extension , # out
 | 
						|
        $rh_Language_by_Script    , # out
 | 
						|
        $rh_Language_by_File      , # out
 | 
						|
        $rh_Language_by_Prefix    , # out
 | 
						|
        $rhaa_Filters_by_Language , # out
 | 
						|
        $rh_Not_Code_Extension    , # out
 | 
						|
        $rh_Not_Code_Filename     , # out
 | 
						|
        $rh_Scale_Factor          , # out
 | 
						|
        $rh_Known_Binary_Archives , # out
 | 
						|
        $rh_EOL_continuation_re   , # out
 | 
						|
       ) = @_;
 | 
						|
# 1}}}
 | 
						|
%{$rh_Language_by_Extension} = (             # {{{1
 | 
						|
            'abap'        => 'ABAP'                  ,
 | 
						|
            'ac'          => 'm4'                    ,
 | 
						|
            'ada'         => 'Ada'                   ,
 | 
						|
            'adb'         => 'Ada'                   ,
 | 
						|
            'ads'         => 'Ada'                   ,
 | 
						|
            'adso'        => 'ADSO/IDSM'             ,
 | 
						|
            'ahkl'        => 'AutoHotkey'            ,
 | 
						|
            'ahk'         => 'AutoHotkey'            ,
 | 
						|
            'agda'        => 'Agda'                  ,
 | 
						|
            'lagda'       => 'Agda'                  ,
 | 
						|
            'aj'          => 'AspectJ'               ,
 | 
						|
            'am'          => 'make'                  ,
 | 
						|
            'ample'       => 'AMPLE'                 ,
 | 
						|
            'apl'         => 'APL'                   ,
 | 
						|
            'apla'        => 'APL'                   ,
 | 
						|
            'aplf'        => 'APL'                   ,
 | 
						|
            'aplo'        => 'APL'                   ,
 | 
						|
            'apln'        => 'APL'                   ,
 | 
						|
            'aplc'        => 'APL'                   ,
 | 
						|
            'apli'        => 'APL'                   ,
 | 
						|
            'applescript' => 'AppleScript'           ,
 | 
						|
            'dyalog'      => 'APL'                   ,
 | 
						|
            'dyapp'       => 'APL'                   ,
 | 
						|
            'mipage'      => 'APL'                   ,
 | 
						|
            'art'         => 'Arturo'                ,
 | 
						|
            'as'          => 'ActionScript'          ,
 | 
						|
            'adoc'        => 'AsciiDoc'              ,
 | 
						|
            'asciidoc'    => 'AsciiDoc'              ,
 | 
						|
            'dofile'      => 'AMPLE'                 ,
 | 
						|
            'startup'     => 'AMPLE'                 ,
 | 
						|
            'axd'         => 'ASP'                   ,
 | 
						|
            'ashx'        => 'ASP'                   ,
 | 
						|
            'asa'         => 'ASP'                   ,
 | 
						|
            'asax'        => 'ASP.NET'               ,
 | 
						|
            'ascx'        => 'ASP.NET'               ,
 | 
						|
            'asd'         => 'Lisp'                  , # system definition file
 | 
						|
            'asmx'        => 'ASP.NET'               ,
 | 
						|
            'asp'         => 'ASP'                   ,
 | 
						|
            'aspx'        => 'ASP.NET'               ,
 | 
						|
            'master'      => 'ASP.NET'               ,
 | 
						|
            'sitemap'     => 'ASP.NET'               ,
 | 
						|
            'nasm'        => 'Assembly'              ,
 | 
						|
            'a51'         => 'Assembly'              ,
 | 
						|
            'asm'         => 'Assembly'              ,
 | 
						|
            'astro'       => 'Astro'                 ,
 | 
						|
            'asy'         => 'Asymptote'             ,
 | 
						|
            'cshtml'      => 'Razor'                 ,
 | 
						|
            'razor'       => 'Razor'                 , # Client-side Blazor
 | 
						|
            'nawk'        => 'awk'                   ,
 | 
						|
            'mawk'        => 'awk'                   ,
 | 
						|
            'gawk'        => 'awk'                   ,
 | 
						|
            'auk'         => 'awk'                   ,
 | 
						|
            'awk'         => 'awk'                   ,
 | 
						|
            'bash'        => 'Bourne Again Shell'    ,
 | 
						|
            'bazel'       => 'Starlark'              ,
 | 
						|
            'BUILD'       => 'Bazel'                 ,
 | 
						|
            'dxl'         => 'DOORS Extension Language',
 | 
						|
            'bat'         => 'DOS Batch'             ,
 | 
						|
            'BAT'         => 'DOS Batch'             ,
 | 
						|
            'cmd'         => 'DOS Batch'             ,
 | 
						|
            'CMD'         => 'DOS Batch'             ,
 | 
						|
            'btm'         => 'DOS Batch'             ,
 | 
						|
            'BTM'         => 'DOS Batch'             ,
 | 
						|
            'blade'       => 'Blade'                 ,
 | 
						|
            'blade.php'   => 'Blade'                 ,
 | 
						|
            'build.xml'   => 'Ant'                   ,
 | 
						|
            'b'           => 'Brainfuck'             ,
 | 
						|
            'bf'          => 'Brainfuck'             ,
 | 
						|
            'brs'         => 'BrightScript'          ,
 | 
						|
            'bzl'         => 'Starlark'              ,
 | 
						|
            'btp'         => 'BizTalk Pipeline'      ,
 | 
						|
            'odx'         => 'BizTalk Orchestration' ,
 | 
						|
            'carbon'      => 'Carbon'                ,
 | 
						|
            'cpy'         => 'COBOL'                 ,
 | 
						|
            'cobol'       => 'COBOL'                 ,
 | 
						|
            'ccp'         => 'COBOL'                 ,
 | 
						|
            'cbl'         => 'COBOL'                 ,
 | 
						|
            'CBL'         => 'COBOL'                 ,
 | 
						|
            'idc'         => 'C'                     ,
 | 
						|
            'cats'        => 'C'                     ,
 | 
						|
            'c'           => 'C'                     ,
 | 
						|
            'c++'         => 'C++'                   ,
 | 
						|
            'C'           => 'C++'                   ,
 | 
						|
            'cc'          => 'C++'                   ,
 | 
						|
            'ccm'         => 'C++'                   ,
 | 
						|
            'c++m'        => 'C++'                   ,
 | 
						|
            'cppm'        => 'C++'                   ,
 | 
						|
            'cxxm'        => 'C++'                   ,
 | 
						|
            'h++'         => 'C++'                   ,
 | 
						|
            'inl'         => 'C++'                   ,
 | 
						|
            'ipp'         => 'C++'                   ,
 | 
						|
            'ixx'         => 'C++'                   ,
 | 
						|
            'tcc'         => 'C++'                   ,
 | 
						|
            'tpp'         => 'C++'                   ,
 | 
						|
            'ccs'         => 'CCS'                   ,
 | 
						|
            'cfc'         => 'ColdFusion CFScript'   ,
 | 
						|
            'cfml'        => 'ColdFusion'            ,
 | 
						|
            'cfm'         => 'ColdFusion'            ,
 | 
						|
            'chpl'        => 'Chapel'                ,
 | 
						|
            'cl'          => 'Lisp/OpenCL'           ,
 | 
						|
            'riemann.config'=> 'Clojure'               ,
 | 
						|
            'hic'         => 'Clojure'               ,
 | 
						|
            'cljx'        => 'Clojure'               ,
 | 
						|
            'cljscm'      => 'Clojure'               ,
 | 
						|
            'cljs.hl'     => 'Clojure'               ,
 | 
						|
            'cl2'         => 'Clojure'               ,
 | 
						|
            'boot'        => 'Clojure'               ,
 | 
						|
            'clj'         => 'Clojure'               ,
 | 
						|
            'cljs'        => 'ClojureScript'         ,
 | 
						|
            'cljc'        => 'ClojureC'              ,
 | 
						|
            'cls'         => 'Visual Basic/TeX/Apex Class' ,
 | 
						|
            'cmake.in'    => 'CMake'                 ,
 | 
						|
            'CMakeLists.txt' => 'CMake'              ,
 | 
						|
            'cmake'       => 'CMake'                 ,
 | 
						|
            'cob'         => 'COBOL'                 ,
 | 
						|
            'COB'         => 'COBOL'                 ,
 | 
						|
            'cocoa5'      => 'CoCoA 5'               ,
 | 
						|
            'c5'          => 'CoCoA 5'               ,
 | 
						|
            'cpkg5'       => 'CoCoA 5'               ,
 | 
						|
            'cocoa5server'=> 'CoCoA 5'               ,
 | 
						|
            'iced'        => 'CoffeeScript'          ,
 | 
						|
            'cjsx'        => 'CoffeeScript'          ,
 | 
						|
            'cakefile'    => 'CoffeeScript'          ,
 | 
						|
            '_coffee'     => 'CoffeeScript'          ,
 | 
						|
            'coffee'      => 'CoffeeScript'          ,
 | 
						|
            'component'   => 'Visualforce Component' ,
 | 
						|
            'cg3'         => 'Constraint Grammar'    ,
 | 
						|
            'rlx'         => 'Constraint Grammar'    ,
 | 
						|
            'Containerfile'  => 'Containerfile'      ,
 | 
						|
            'cpp'         => 'C++'                   ,
 | 
						|
            'CPP'         => 'C++'                   ,
 | 
						|
            'cr'          => 'Crystal'               ,
 | 
						|
            'cs'          => 'C#/Smalltalk'          ,
 | 
						|
            'designer.cs' => 'C# Designer'           ,
 | 
						|
            'cake'        => 'Cake Build Script'     ,
 | 
						|
            'csh'         => 'C Shell'               ,
 | 
						|
            'cson'        => 'CSON'                  ,
 | 
						|
            'css'         => "CSS"                   ,
 | 
						|
            'csv'         => "CSV"                   ,
 | 
						|
            'cu'          => 'CUDA'                  ,
 | 
						|
            'cuh'         => 'CUDA'                  , # CUDA header file
 | 
						|
            'cxx'         => 'C++'                   ,
 | 
						|
            'd'           => 'D/dtrace'              ,
 | 
						|
# in addition, .d can map to init.d files typically written as
 | 
						|
# bash or sh scripts
 | 
						|
            'dfy'         => 'Dafny'                 ,
 | 
						|
            'da'          => 'DAL'                   ,
 | 
						|
            'dart'        => 'Dart'                  ,
 | 
						|
            'dsc'         => 'DenizenScript'         ,
 | 
						|
            'derw'        => 'Derw'                  ,
 | 
						|
            'def'         => 'Windows Module Definition',
 | 
						|
            'dhall'       => 'dhall'                 ,
 | 
						|
            'dt'          => 'DIET'                  ,
 | 
						|
            'patch'       => 'diff'                  ,
 | 
						|
            'diff'        => 'diff'                  ,
 | 
						|
            'dmap'        => 'NASTRAN DMAP'          ,
 | 
						|
            'sthlp'       => 'Stata'                 ,
 | 
						|
            'matah'       => 'Stata'                 ,
 | 
						|
            'mata'        => 'Stata'                 ,
 | 
						|
            'ihlp'        => 'Stata'                 ,
 | 
						|
            'doh'         => 'Stata'                 ,
 | 
						|
            'ado'         => 'Stata'                 ,
 | 
						|
            'do'          => 'Stata'                 ,
 | 
						|
            'DO'          => 'Stata'                 ,
 | 
						|
            'Dockerfile'  => 'Dockerfile'            ,
 | 
						|
            'dockerfile'  => 'Dockerfile'            ,
 | 
						|
            'pascal'      => 'Pascal'                ,
 | 
						|
            'lpr'         => 'Pascal'                ,
 | 
						|
            'dfm'         => 'Delphi Form'           ,
 | 
						|
            'dpr'         => 'Pascal'                ,
 | 
						|
            'dita'        => 'DITA'                  ,
 | 
						|
            'drl'         => 'Drools'                ,
 | 
						|
            'dtd'         => 'DTD'                   ,
 | 
						|
            'ec'          => 'C'                     ,
 | 
						|
            'ecpp'        => 'ECPP'                  ,
 | 
						|
            'eex'         => 'EEx'                   ,
 | 
						|
            'el'          => 'Lisp'                  ,
 | 
						|
            'elm'         => 'Elm'                   ,
 | 
						|
            'exs'         => 'Elixir'                ,
 | 
						|
            'ex'          => 'Elixir'                ,
 | 
						|
            'ecr'         => 'Embedded Crystal'      ,
 | 
						|
            'ejs'         => 'EJS'                   ,
 | 
						|
            'erb'         => 'ERB'                   ,
 | 
						|
            'ERB'         => 'ERB'                   ,
 | 
						|
            'ets'         => 'ArkTs'                 , # OpenHarmonyOS app language
 | 
						|
            'yrl'         => 'Erlang'                ,
 | 
						|
            'xrl'         => 'Erlang'                ,
 | 
						|
            'rebar.lock'  => 'Erlang'                ,
 | 
						|
            'rebar.config.lock'=> 'Erlang'           ,
 | 
						|
            'rebar.config'=> 'Erlang'                ,
 | 
						|
            'emakefile'   => 'Erlang'                ,
 | 
						|
            'app.src'     => 'Erlang'                ,
 | 
						|
            'erl'         => 'Erlang'                ,
 | 
						|
            'exp'         => 'Expect'                ,
 | 
						|
            '4th'         => 'Forth'                 ,
 | 
						|
            'fish'        => 'Fish Shell'            ,
 | 
						|
            'fsl'         => 'Finite State Language' ,
 | 
						|
            'jssm'        => 'Finite State Language' ,
 | 
						|
            'fnl'         => 'Fennel'                ,
 | 
						|
            'forth'       => 'Forth'                 ,
 | 
						|
            'fr'          => 'Forth'                 ,
 | 
						|
            'frt'         => 'Forth'                 ,
 | 
						|
            'fth'         => 'Forth'                 ,
 | 
						|
            'f83'         => 'Forth'                 ,
 | 
						|
            'fb'          => 'Forth'                 ,
 | 
						|
            'fpm'         => 'Forth'                 ,
 | 
						|
            'e4'          => 'Forth'                 ,
 | 
						|
            'rx'          => 'Forth'                 ,
 | 
						|
            'ft'          => 'Forth'                 ,
 | 
						|
            'f77'         => 'Fortran 77'            ,
 | 
						|
            'F77'         => 'Fortran 77'            ,
 | 
						|
            'f90'         => 'Fortran 90'            ,
 | 
						|
            'F90'         => 'Fortran 90'            ,
 | 
						|
            'f95'         => 'Fortran 95'            ,
 | 
						|
            'F95'         => 'Fortran 95'            ,
 | 
						|
            'f'           => 'Fortran 77/Forth'      ,
 | 
						|
            'F'           => 'Fortran 77'            ,
 | 
						|
            'for'         => 'Fortran 77/Forth'      ,
 | 
						|
            'FOR'         => 'Fortran 77'            ,
 | 
						|
            'ftl'         => 'Freemarker Template'   ,
 | 
						|
            'ftn'         => 'Fortran 77'            ,
 | 
						|
            'FTN'         => 'Fortran 77'            ,
 | 
						|
            'fmt'         => 'Oracle Forms'          ,
 | 
						|
            'focexec'     => 'Focus'                 ,
 | 
						|
            'fs'          => 'F#/Forth'              ,
 | 
						|
            'fsi'         => 'F#'                    ,
 | 
						|
            'fsx'         => 'F# Script'             ,
 | 
						|
            'fut'         => 'Futhark'               ,
 | 
						|
            'fxml'        => 'FXML'                  ,
 | 
						|
            'gnumakefile' => 'make'                  ,
 | 
						|
            'Gnumakefile' => 'make'                  ,
 | 
						|
            'gd'          => 'GDScript'              ,
 | 
						|
            'gdshader'    => 'Godot Shaders'         ,
 | 
						|
            'vshader'     => 'GLSL'                  ,
 | 
						|
            'vsh'         => 'GLSL'                  ,
 | 
						|
            'vrx'         => 'GLSL'                  ,
 | 
						|
            'gshader'     => 'GLSL'                  ,
 | 
						|
            'glslv'       => 'GLSL'                  ,
 | 
						|
            'geo'         => 'GLSL'                  ,
 | 
						|
            'fshader'     => 'GLSL'                  ,
 | 
						|
            'fsh'         => 'GLSL'                  ,
 | 
						|
            'frg'         => 'GLSL'                  ,
 | 
						|
            'fp'          => 'GLSL'                  ,
 | 
						|
            'fbs'         => 'Flatbuffers'           ,
 | 
						|
            'gjs'         => 'Glimmer JavaScript'    ,
 | 
						|
            'gts'         => 'Glimmer TypeScript'    ,
 | 
						|
            'glsl'        => 'GLSL'                  ,
 | 
						|
            'graphqls'    => 'GraphQL'               ,
 | 
						|
            'gql'         => 'GraphQL'               ,
 | 
						|
            'graphql'     => 'GraphQL'               ,
 | 
						|
            'vert'        => 'GLSL'                  ,
 | 
						|
            'tesc'        => 'GLSL'                  ,
 | 
						|
            'tese'        => 'GLSL'                  ,
 | 
						|
            'geom'        => 'GLSL'                  ,
 | 
						|
            'feature'     => 'Cucumber'              ,
 | 
						|
            'frag'        => 'GLSL'                  ,
 | 
						|
            'comp'        => 'GLSL'                  ,
 | 
						|
            'g'           => 'ANTLR Grammar'         ,
 | 
						|
            'g4'          => 'ANTLR Grammar'         ,
 | 
						|
            'gleam'       => 'Gleam'                 ,
 | 
						|
            'go'          => 'Go'                    ,
 | 
						|
            'ʕ◔ϖ◔ʔ'       => 'Go'                    ,
 | 
						|
            'gsp'         => 'Grails'                ,
 | 
						|
            'jenkinsfile' => 'Groovy'                ,
 | 
						|
            'gvy'         => 'Groovy'                ,
 | 
						|
            'gtpl'        => 'Groovy'                ,
 | 
						|
            'grt'         => 'Groovy'                ,
 | 
						|
            'groovy'      => 'Groovy'                ,
 | 
						|
            'gant'        => 'Groovy'                ,
 | 
						|
            'gradle'      => 'Gradle'                ,
 | 
						|
            'gradle.kts'  => 'Gradle'                ,
 | 
						|
            'h'           => 'C/C++ Header'          ,
 | 
						|
            'H'           => 'C/C++ Header'          ,
 | 
						|
            'hh'          => 'C/C++ Header'          ,
 | 
						|
            'hpp'         => 'C/C++ Header'          ,
 | 
						|
            'hxx'         => 'C/C++ Header'          ,
 | 
						|
            'hb'          => 'Harbour'               ,
 | 
						|
            'hrl'         => 'Erlang'                ,
 | 
						|
            'hsc'         => 'Haskell'               ,
 | 
						|
            'hs'          => 'Haskell'               ,
 | 
						|
            'tfvars'      => 'HCL'                   ,
 | 
						|
            'hcl'         => 'HCL'                   ,
 | 
						|
            'tf'          => 'HCL'                   ,
 | 
						|
            'nomad'       => 'HCL'                   ,
 | 
						|
            'hlsli'       => 'HLSL'                  ,
 | 
						|
            'fxh'         => 'HLSL'                  ,
 | 
						|
            'hlsl'        => 'HLSL'                  ,
 | 
						|
            'shader'      => 'HLSL'                  ,
 | 
						|
            'cg'          => 'HLSL'                  ,
 | 
						|
            'cginc'       => 'HLSL'                  ,
 | 
						|
            'haml.deface' => 'Haml'                  ,
 | 
						|
            'haml'        => 'Haml'                  ,
 | 
						|
            'handlebars'  => 'Handlebars'            ,
 | 
						|
            'hbs'         => 'Handlebars'            ,
 | 
						|
            'ha'          => 'Hare'                  ,
 | 
						|
            'hxsl'        => 'Haxe'                  ,
 | 
						|
            'hx'          => 'Haxe'                  ,
 | 
						|
            'HC'          => 'HolyC'                 ,
 | 
						|
            'hoon'        => 'Hoon'                  ,
 | 
						|
            'xht'         => 'HTML'                  ,
 | 
						|
            'html.hl'     => 'HTML'                  ,
 | 
						|
            'htm'         => 'HTML'                  ,
 | 
						|
            'html'        => 'HTML'                  ,
 | 
						|
            'heex'        => 'HTML EEx'              ,
 | 
						|
            'i3'          => 'Modula3'               ,
 | 
						|
            'ice'         => 'Slice'                 ,
 | 
						|
            'icl'         => 'Clean'                 ,
 | 
						|
            'dcl'         => 'Clean'                 ,
 | 
						|
            'dlm'         => 'IDL'                   ,
 | 
						|
            'idl'         => 'IDL'                   ,
 | 
						|
            'idr'         => 'Idris'                 ,
 | 
						|
            'lidr'        => 'Literate Idris'        ,
 | 
						|
            'imba'        => 'Imba'                  ,
 | 
						|
            'prefs'       => 'INI'                   ,
 | 
						|
            'lektorproject'=> 'INI'                  ,
 | 
						|
            'buildozer.spec'=> 'INI'                 ,
 | 
						|
            'ini'         => 'INI'                   ,
 | 
						|
            'editorconfig'=> 'INI'                   ,
 | 
						|
            'ism'         => 'InstallShield'         ,
 | 
						|
            'ipl'         => 'IPL'                   ,
 | 
						|
            'pro'         => 'IDL/Qt Project/Prolog/ProGuard' ,
 | 
						|
            'ig'          => 'Modula3'               ,
 | 
						|
            'il'          => 'SKILL'                 ,
 | 
						|
            'ils'         => 'SKILL++'               ,
 | 
						|
            'inc'         => 'PHP/Pascal/Fortran'    ,
 | 
						|
            'ino'         => 'Arduino Sketch'        ,
 | 
						|
            'ipf'         => 'Igor Pro'              ,
 | 
						|
           #'pde'         => 'Arduino Sketch'        , # pre 1.0
 | 
						|
            'pde'         => 'Processing'            , # pre 1.0
 | 
						|
            'itk'         => 'Tcl/Tk'                ,
 | 
						|
            'java'        => 'Java'                  ,
 | 
						|
            'jcl'         => 'JCL'                   , # IBM Job Control Lang.
 | 
						|
            'jl'          => 'Lisp/Julia'            ,
 | 
						|
            'jai'         => 'Jai'                   ,
 | 
						|
            'janet'       => 'Janet'                 ,
 | 
						|
            'xsjslib'     => 'JavaScript'            ,
 | 
						|
            'xsjs'        => 'JavaScript'            ,
 | 
						|
            'ssjs'        => 'JavaScript'            ,
 | 
						|
            'sjs'         => 'JavaScript'            ,
 | 
						|
            'pac'         => 'JavaScript'            ,
 | 
						|
            'njs'         => 'JavaScript'            ,
 | 
						|
            'mjs'         => 'JavaScript'            ,
 | 
						|
            'cjs'         => 'JavaScript'            ,
 | 
						|
            'jss'         => 'JavaScript'            ,
 | 
						|
            'jsm'         => 'JavaScript'            ,
 | 
						|
            'jsfl'        => 'JavaScript'            ,
 | 
						|
            'jscad'       => 'JavaScript'            ,
 | 
						|
            'jsb'         => 'JavaScript'            ,
 | 
						|
            'jakefile'    => 'JavaScript'            ,
 | 
						|
            'jake'        => 'JavaScript'            ,
 | 
						|
            'bones'       => 'JavaScript'            ,
 | 
						|
            '_js'         => 'JavaScript'            ,
 | 
						|
            'js'          => 'JavaScript'            ,
 | 
						|
            'es6'         => 'JavaScript'            ,
 | 
						|
            'jsf'         => 'JavaServer Faces'      ,
 | 
						|
            'jsx'         => 'JSX'                   ,
 | 
						|
            'xhtml'       => 'XHTML'                 ,
 | 
						|
            'j2'          => 'Jinja Template'        ,
 | 
						|
            'jinja'       => 'Jinja Template'        ,
 | 
						|
            'jinja2'      => 'Jinja Template'        ,
 | 
						|
            'yyp'         => 'JSON'                  ,
 | 
						|
            'webmanifest' => 'JSON'                  ,
 | 
						|
            'webapp'      => 'JSON'                  ,
 | 
						|
            'topojson'    => 'JSON'                  ,
 | 
						|
            'tfstate.backup'=> 'JSON'                  ,
 | 
						|
            'tfstate'     => 'JSON'                  ,
 | 
						|
            'mcmod.info'  => 'JSON'                  ,
 | 
						|
            'mcmeta'      => 'JSON'                  ,
 | 
						|
            'json-tmlanguage'=> 'JSON'                  ,
 | 
						|
            'jsonl'       => 'JSON'                  ,
 | 
						|
            'har'         => 'JSON'                  ,
 | 
						|
            'gltf'        => 'JSON'                  ,
 | 
						|
            'geojson'     => 'JSON'                  ,
 | 
						|
            'composer.lock'=> 'JSON'                  ,
 | 
						|
            'avsc'        => 'JSON'                  ,
 | 
						|
            'watchmanconfig'=> 'JSON'                  ,
 | 
						|
            'tern-project'=> 'JSON'                  ,
 | 
						|
            'tern-config' => 'JSON'                  ,
 | 
						|
            'htmlhintrc'  => 'JSON'                  ,
 | 
						|
            'arcconfig'   => 'JSON'                  ,
 | 
						|
            'json'        => 'JSON'                  ,
 | 
						|
            'json5'       => 'JSON5'                 ,
 | 
						|
            'jsp'         => 'JSP'                   , # Java server pages
 | 
						|
            'jspf'        => 'JSP'                   , # Java server pages
 | 
						|
            'junos'       => 'Juniper Junos'         ,
 | 
						|
            'vm'          => 'Velocity Template Language' ,
 | 
						|
            'kv'          => 'kvlang'                ,
 | 
						|
            'ksc'         => 'Kermit'                ,
 | 
						|
            'ksh'         => 'Korn Shell'            ,
 | 
						|
            'ktm'         => 'Kotlin'                ,
 | 
						|
            'kt'          => 'Kotlin'                ,
 | 
						|
            'kts'         => 'Kotlin'                ,
 | 
						|
            'hlean'       => 'Lean'                  ,
 | 
						|
            'lean'        => 'Lean'                  ,
 | 
						|
            'lhs'         => 'Haskell'               ,
 | 
						|
            'lex'         => 'lex'                   ,
 | 
						|
            'l'           => 'lex'                   ,
 | 
						|
            'ld'          => 'Linker Script'         ,
 | 
						|
            'lem'         => 'Lem'                   ,
 | 
						|
            'less'        => 'LESS'                  ,
 | 
						|
            'lfe'         => 'LFE'                   ,
 | 
						|
            'liquid'      => 'liquid'                ,
 | 
						|
            'lsp'         => 'Lisp'                  ,
 | 
						|
            'lisp'        => 'Lisp'                  ,
 | 
						|
            'll'          => 'LLVM IR'               ,
 | 
						|
            'lgt'         => 'Logtalk'               ,
 | 
						|
            'logtalk'     => 'Logtalk'               ,
 | 
						|
            'lp'          => 'AnsProlog'             ,  # Answer Set Programming / clingo
 | 
						|
            'wlua'        => 'Lua'                   ,
 | 
						|
            'rbxs'        => 'Lua'                   ,
 | 
						|
            'pd_lua'      => 'Lua'                   ,
 | 
						|
            'p8'          => 'Lua'                   ,
 | 
						|
            'nse'         => 'Lua'                   ,
 | 
						|
            'lua'         => 'Lua'                   ,
 | 
						|
            'luau'        => 'Luau'                  ,
 | 
						|
            'm3'          => 'Modula3'               ,
 | 
						|
            'm4'          => 'm4'                    ,
 | 
						|
            'makefile'    => 'make'                  ,
 | 
						|
            'Makefile'    => 'make'                  ,
 | 
						|
            'mao'         => 'Mako'                  ,
 | 
						|
            'mako'        => 'Mako'                  ,
 | 
						|
            'workbook'    => 'Markdown'              ,
 | 
						|
            'ronn'        => 'Markdown'              ,
 | 
						|
            'mkdown'      => 'Markdown'              ,
 | 
						|
            'mkdn'        => 'Markdown'              ,
 | 
						|
            'mkd'         => 'Markdown'              ,
 | 
						|
            'mdx'         => 'Markdown'              ,
 | 
						|
            'mdwn'        => 'Markdown'              ,
 | 
						|
            'mdown'       => 'Markdown'              ,
 | 
						|
            'markdown'    => 'Markdown'              ,
 | 
						|
            'contents.lr' => 'Markdown'              ,
 | 
						|
            'md'          => 'Markdown'              ,
 | 
						|
            'mc'          => 'Windows Message File'  ,
 | 
						|
            'met'         => 'Teamcenter met'        ,
 | 
						|
            'mg'          => 'Modula3'               ,
 | 
						|
            'mojom'       => 'Mojom'                 ,
 | 
						|
            'mojo'        => 'Mojo'                  ,
 | 
						|
            '🔥'          => 'Mojo'                  ,
 | 
						|
            'meson.build' => 'Meson'                 ,
 | 
						|
            'metal'       => 'Metal'                 ,
 | 
						|
            'mk'          => 'make'                  ,
 | 
						|
#           'mli'         => 'ML'                    , # ML not implemented
 | 
						|
#           'ml'          => 'ML'                    ,
 | 
						|
            'ml4'         => 'OCaml'                 ,
 | 
						|
            'eliomi'      => 'OCaml'                 ,
 | 
						|
            'eliom'       => 'OCaml'                 ,
 | 
						|
            'ml'          => 'OCaml'                 ,
 | 
						|
            'mli'         => 'OCaml'                 ,
 | 
						|
            'mly'         => 'OCaml'                 ,
 | 
						|
            'mll'         => 'OCaml'                 ,
 | 
						|
            'm'           => 'MATLAB/Mathematica/Objective-C/MUMPS/Mercury' ,
 | 
						|
            'mm'          => 'Objective-C++'         ,
 | 
						|
            'msg'         => 'Gencat NLS'            ,
 | 
						|
            'nbp'         => 'Mathematica'           ,
 | 
						|
            'mathematica' => 'Mathematica'           ,
 | 
						|
            'ma'          => 'Mathematica'           ,
 | 
						|
            'cdf'         => 'Mathematica'           ,
 | 
						|
            'mt'          => 'Mathematica'           ,
 | 
						|
            'wl'          => 'Mathematica'           ,
 | 
						|
            'wlt'         => 'Mathematica'           ,
 | 
						|
            'mo'          => 'Modelica'              ,
 | 
						|
            'mustache'    => 'Mustache'              ,
 | 
						|
            'wdproj'      => 'MSBuild script'        ,
 | 
						|
            'csproj'      => 'MSBuild script'        ,
 | 
						|
            'vcproj'      => 'MSBuild script'        ,
 | 
						|
            'wixproj'     => 'MSBuild script'        ,
 | 
						|
            'btproj'      => 'MSBuild script'        ,
 | 
						|
            'msbuild'     => 'MSBuild script'        ,
 | 
						|
            'sln'         => 'Visual Studio Solution',
 | 
						|
            'mps'         => 'MUMPS'                 ,
 | 
						|
            'mth'         => 'Teamcenter mth'        ,
 | 
						|
            'n'           => 'Nemerle'               ,
 | 
						|
            'nlogo'       => 'NetLogo'               ,
 | 
						|
            'nls'         => 'NetLogo'               ,
 | 
						|
            'ncl'         => 'Nickel'                ,
 | 
						|
            'nims'        => 'Nim'                   ,
 | 
						|
            'nimrod'      => 'Nim'                   ,
 | 
						|
            'nimble'      => 'Nim'                   ,
 | 
						|
            'nim.cfg'     => 'Nim'                   ,
 | 
						|
            'nim'         => 'Nim'                   ,
 | 
						|
            'nix'         => 'Nix'                   ,
 | 
						|
            'nut'         => 'Squirrel'              ,
 | 
						|
            'njk'         => 'Nunjucks'              ,
 | 
						|
            'odin'        => 'Odin'                  ,
 | 
						|
            'oscript'     => 'LiveLink OScript'      ,
 | 
						|
            'bod'         => 'Oracle PL/SQL'         ,
 | 
						|
            'spc'         => 'Oracle PL/SQL'         ,
 | 
						|
            'fnc'         => 'Oracle PL/SQL'         ,
 | 
						|
            'prc'         => 'Oracle PL/SQL'         ,
 | 
						|
            'trg'         => 'Oracle PL/SQL'         ,
 | 
						|
            'pad'         => 'Ada'                   , # Oracle Ada preprocessor
 | 
						|
            'page'        => 'Visualforce Page'      ,
 | 
						|
            'pas'         => 'Pascal'                ,
 | 
						|
            'pcc'         => 'C++'                   , # Oracle C++ preprocessor
 | 
						|
            'rexfile'     => 'Perl'                  ,
 | 
						|
            'psgi'        => 'Perl'                  ,
 | 
						|
            'ph'          => 'Perl'                  ,
 | 
						|
            'makefile.pl' => 'Perl'                  ,
 | 
						|
            'cpanfile'    => 'Perl'                  ,
 | 
						|
            'al'          => 'Perl'                  ,
 | 
						|
            'ack'         => 'Perl'                  ,
 | 
						|
            'perl'        => 'Perl'                  ,
 | 
						|
            'pfo'         => 'Fortran 77'            ,
 | 
						|
            'pgc'         => 'C'                     , # Postgres embedded C/C++
 | 
						|
            'phpt'        => 'PHP'                   ,
 | 
						|
            'phps'        => 'PHP'                   ,
 | 
						|
            'phakefile'   => 'PHP'                   ,
 | 
						|
            'ctp'         => 'PHP'                   ,
 | 
						|
            'aw'          => 'PHP'                   ,
 | 
						|
            'php_cs.dist' => 'PHP'                   ,
 | 
						|
            'php_cs'      => 'PHP'                   ,
 | 
						|
            'php3'        => 'PHP'                   ,
 | 
						|
            'php4'        => 'PHP'                   ,
 | 
						|
            'php5'        => 'PHP'                   ,
 | 
						|
            'php'         => 'PHP'                   ,
 | 
						|
            'phtml'       => 'PHP'                   ,
 | 
						|
            'pig'         => 'Pig Latin'             ,
 | 
						|
            'plh'         => 'Perl'                  ,
 | 
						|
            'pl'          => 'Perl/Prolog'           ,
 | 
						|
            'PL'          => 'Perl/Prolog'           ,
 | 
						|
            'p6'          => 'Raku/Prolog'           ,
 | 
						|
            'P6'          => 'Raku/Prolog'           ,
 | 
						|
            'plx'         => 'Perl'                  ,
 | 
						|
            'pm'          => 'Perl'                  ,
 | 
						|
            'pm6'         => 'Raku'                  ,
 | 
						|
            'raku'        => 'Raku'                  ,
 | 
						|
            'rakumod'     => 'Raku'                  ,
 | 
						|
            'pom.xml'     => 'Maven'                 ,
 | 
						|
            'pom'         => 'Maven'                 ,
 | 
						|
            'scad'        => 'OpenSCAD'              ,
 | 
						|
            'yap'         => 'Prolog'                ,
 | 
						|
            'prolog'      => 'Prolog'                ,
 | 
						|
            'P'           => 'Prolog'                ,
 | 
						|
            'p'           => 'Pascal'                ,
 | 
						|
            'pp'          => 'Pascal/Puppet'         ,
 | 
						|
            'viw'         => 'SQL'                   ,
 | 
						|
            'udf'         => 'SQL'                   ,
 | 
						|
            'tab'         => 'SQL'                   ,
 | 
						|
            'mysql'       => 'SQL'                   ,
 | 
						|
            'cql'         => 'SQL'                   ,
 | 
						|
            'psql'        => 'SQL'                   ,
 | 
						|
            'xpy'         => 'Python'                ,
 | 
						|
            'wsgi'        => 'Python'                ,
 | 
						|
            'wscript'     => 'Python'                ,
 | 
						|
            'workspace'   => 'Python'                ,
 | 
						|
            'tac'         => 'Python'                ,
 | 
						|
            'snakefile'   => 'Python'                ,
 | 
						|
            'sconstruct'  => 'Python'                ,
 | 
						|
            'sconscript'  => 'Python'                ,
 | 
						|
            'pyt'         => 'Python'                ,
 | 
						|
            'pyp'         => 'Python'                ,
 | 
						|
            'pyi'         => 'Python'                ,
 | 
						|
            'pyde'        => 'Python'                ,
 | 
						|
            'py3'         => 'Python'                ,
 | 
						|
            'lmi'         => 'Python'                ,
 | 
						|
            'gypi'        => 'Python'                ,
 | 
						|
            'gyp'         => 'Python'                ,
 | 
						|
            'build.bazel' => 'Python'                ,
 | 
						|
            'buck'        => 'Python'                ,
 | 
						|
            'gclient'     => 'Python'                ,
 | 
						|
            'py'          => 'Python'                ,
 | 
						|
            'pyw'         => 'Python'                ,
 | 
						|
            'ipynb'       => 'Jupyter Notebook'      ,
 | 
						|
            'pyj'         => 'RapydScript'           ,
 | 
						|
            'pxi'         => 'Cython'                ,
 | 
						|
            'pxd'         => 'Cython'                ,
 | 
						|
            'pyx'         => 'Cython'                ,
 | 
						|
            'qbs'         => 'QML'                   ,
 | 
						|
            'qml'         => 'QML'                   ,
 | 
						|
            'watchr'      => 'Ruby'                  ,
 | 
						|
            'vagrantfile' => 'Ruby'                  ,
 | 
						|
            'thorfile'    => 'Ruby'                  ,
 | 
						|
            'thor'        => 'Ruby'                  ,
 | 
						|
            'snapfile'    => 'Ruby'                  ,
 | 
						|
            'ru'          => 'Ruby'                  ,
 | 
						|
            'rbx'         => 'Ruby'                  ,
 | 
						|
            'rbw'         => 'Ruby'                  ,
 | 
						|
            'rbuild'      => 'Ruby'                  ,
 | 
						|
            'rabl'        => 'Ruby'                  ,
 | 
						|
            'puppetfile'  => 'Ruby'                  ,
 | 
						|
            'podfile'     => 'Ruby'                  ,
 | 
						|
            'mspec'       => 'Ruby'                  ,
 | 
						|
            'mavenfile'   => 'Ruby'                  ,
 | 
						|
            'jbuilder'    => 'Ruby'                  ,
 | 
						|
            'jarfile'     => 'Ruby'                  ,
 | 
						|
            'guardfile'   => 'Ruby'                  ,
 | 
						|
            'god'         => 'Ruby'                  ,
 | 
						|
            'gemspec'     => 'Ruby'                  ,
 | 
						|
            'gemfile.lock'=> 'Ruby'                  ,
 | 
						|
            'gemfile'     => 'Ruby'                  ,
 | 
						|
            'fastfile'    => 'Ruby'                  ,
 | 
						|
            'eye'         => 'Ruby'                  ,
 | 
						|
            'deliverfile' => 'Ruby'                  ,
 | 
						|
            'dangerfile'  => 'Ruby'                  ,
 | 
						|
            'capfile'     => 'Ruby'                  ,
 | 
						|
            'buildfile'   => 'Ruby'                  ,
 | 
						|
            'builder'     => 'Ruby'                  ,
 | 
						|
            'brewfile'    => 'Ruby'                  ,
 | 
						|
            'berksfile'   => 'Ruby'                  ,
 | 
						|
            'appraisals'  => 'Ruby'                  ,
 | 
						|
            'pryrc'       => 'Ruby'                  ,
 | 
						|
            'irbrc'       => 'Ruby'                  ,
 | 
						|
            'rb'          => 'Ruby'                  ,
 | 
						|
            'podspec'     => 'Ruby'                  ,
 | 
						|
            'rake'        => 'Ruby'                  ,
 | 
						|
         #  'resx'        => 'ASP.NET'               ,
 | 
						|
            'rex'         => 'Oracle Reports'        ,
 | 
						|
            'pprx'        => 'Rexx'                  ,
 | 
						|
            'rexx'        => 'Rexx'                  ,
 | 
						|
            'rhtml'       => 'Ruby HTML'             ,
 | 
						|
            'circom'      => 'Circom'                ,
 | 
						|
            'cairo'       => 'Cairo'                 ,
 | 
						|
            'rs.in'       => 'Rust'                  ,
 | 
						|
            'rs'          => 'Rust'                  ,
 | 
						|
            'rst.txt'     => 'reStructuredText'      ,
 | 
						|
            'rest.txt'    => 'reStructuredText'      ,
 | 
						|
            'rest'        => 'reStructuredText'      ,
 | 
						|
            'rst'         => 'reStructuredText'      ,
 | 
						|
            's'           => 'Assembly'              ,
 | 
						|
            'S'           => 'Assembly'              ,
 | 
						|
            'SCA'         => 'Visual Fox Pro'        ,
 | 
						|
            'sca'         => 'Visual Fox Pro'        ,
 | 
						|
            'sbt'         => 'Scala'                 ,
 | 
						|
            'kojo'        => 'Scala'                 ,
 | 
						|
            'scala'       => 'Scala'                 ,
 | 
						|
            'sbl'         => 'Softbridge Basic'      ,
 | 
						|
            'SBL'         => 'Softbridge Basic'      ,
 | 
						|
            'sed'         => 'sed'                   ,
 | 
						|
            'sp'          => 'SparForte'             ,
 | 
						|
            'sol'         => 'Solidity'              ,
 | 
						|
            'p4'          => 'P4'                    ,
 | 
						|
            'ses'         => 'Patran Command Language'   ,
 | 
						|
            'pcl'         => 'Patran Command Language'   ,
 | 
						|
            'peg'         => 'PEG'                   ,
 | 
						|
            'pegjs'       => 'peg.js'                ,
 | 
						|
            'peggy'       => 'peggy'                 ,
 | 
						|
            'pest'        => 'Pest'                  ,
 | 
						|
            'prisma'      => 'Prisma Schema'         ,
 | 
						|
            'tspeg'       => 'tspeg'                 ,
 | 
						|
            'jspeg'       => 'tspeg'                 ,
 | 
						|
            'pl1'         => 'PL/I'                  ,
 | 
						|
            'plm'         => 'PL/M'                  ,
 | 
						|
            'lit'         => 'PL/M'                  ,
 | 
						|
            'iuml'        => 'PlantUML'              ,
 | 
						|
            'pu'          => 'PlantUML'              ,
 | 
						|
            'puml'        => 'PlantUML'              ,
 | 
						|
            'plantuml'    => 'PlantUML'              ,
 | 
						|
            'wsd'         => 'PlantUML'              ,
 | 
						|
            'properties'  => 'Properties'            ,
 | 
						|
            'po'          => 'PO File'               ,
 | 
						|
            'pony'        => 'Pony'                  ,
 | 
						|
            'pbt'         => 'PowerBuilder'          ,
 | 
						|
            'sra'         => 'PowerBuilder'          ,
 | 
						|
            'srf'         => 'PowerBuilder'          ,
 | 
						|
            'srm'         => 'PowerBuilder'          ,
 | 
						|
            'srs'         => 'PowerBuilder'          ,
 | 
						|
            'sru'         => 'PowerBuilder'          ,
 | 
						|
            'srw'         => 'PowerBuilder'          ,
 | 
						|
            'jade'        => 'Pug'                   ,
 | 
						|
            'pug'         => 'Pug'                   ,
 | 
						|
            'purs'        => 'PureScript'            ,
 | 
						|
            'prefab'      => 'Unity-Prefab'          ,
 | 
						|
            'proto'       => 'Protocol Buffers'      ,
 | 
						|
            'mat'         => 'Unity-Prefab'          ,
 | 
						|
            'ps1'         => 'PowerShell'            ,
 | 
						|
            'psd1'        => 'PowerShell'            ,
 | 
						|
            'psm1'        => 'PowerShell'            ,
 | 
						|
            'prql'        => 'PRQL'                  ,
 | 
						|
            'rsx'         => 'R'                     ,
 | 
						|
            'rd'          => 'R'                     ,
 | 
						|
            'expr-dist'   => 'R'                     ,
 | 
						|
            'rprofile'    => 'R'                     ,
 | 
						|
            'R'           => 'R'                     ,
 | 
						|
            'r'           => 'R'                     ,
 | 
						|
            'raml'        => 'RAML'                  ,
 | 
						|
            'ring'        => 'Ring'                  ,
 | 
						|
            'rh'          => 'Ring'                  ,
 | 
						|
            'rform'       => 'Ring'                  ,
 | 
						|
            'rktd'        => 'Racket'                ,
 | 
						|
            'rkt'         => 'Racket'                ,
 | 
						|
            'rktl'        => 'Racket'                ,
 | 
						|
            'Rmd'         => 'Rmd'                   ,
 | 
						|
            're'          => 'ReasonML'              ,
 | 
						|
            'rei'         => 'ReasonML'              ,
 | 
						|
            'res'         => 'ReScript'              ,
 | 
						|
            'resi'        => 'ReScript'              ,
 | 
						|
            'scrbl'       => 'Racket'                ,
 | 
						|
            'sps'         => 'Scheme'                ,
 | 
						|
            'sc'          => 'Scheme'                ,
 | 
						|
            'ss'          => 'Scheme'                ,
 | 
						|
            'scm'         => 'Scheme'                ,
 | 
						|
            'sch'         => 'Scheme'                ,
 | 
						|
            'sls'         => 'Scheme/SaltStack'      ,
 | 
						|
            'sld'         => 'Scheme'                ,
 | 
						|
            'robot'       => 'RobotFramework'        ,
 | 
						|
            'rc'          => 'Windows Resource File' ,
 | 
						|
            'rc2'         => 'Windows Resource File' ,
 | 
						|
            'sas'         => 'SAS'                   ,
 | 
						|
            'sass'        => 'Sass'                  ,
 | 
						|
            'scss'        => 'SCSS'                  ,
 | 
						|
            'sh'          => 'Bourne Shell'          ,
 | 
						|
            'smarty'      => 'Smarty'                ,
 | 
						|
            'sml'         => 'Standard ML'           ,
 | 
						|
            'sig'         => 'Standard ML'           ,
 | 
						|
            'fun'         => 'Standard ML'           ,
 | 
						|
            'slim'        => 'Slim'                  ,
 | 
						|
            'e'           => 'Specman e'             ,
 | 
						|
            'sql'         => 'SQL'                   ,
 | 
						|
            'SQL'         => 'SQL'                   ,
 | 
						|
            'sproc.sql'   => 'SQL Stored Procedure'  ,
 | 
						|
            'spoc.sql'    => 'SQL Stored Procedure'  ,
 | 
						|
            'spc.sql'     => 'SQL Stored Procedure'  ,
 | 
						|
            'udf.sql'     => 'SQL Stored Procedure'  ,
 | 
						|
            'data.sql'    => 'SQL Data'              ,
 | 
						|
            'sss'         => 'SugarSS'               ,
 | 
						|
            'slint'       => 'Slint'                 ,
 | 
						|
            'st'          => 'Smalltalk'             ,
 | 
						|
            'rules'       => 'Snakemake'             ,
 | 
						|
            'smk'         => 'Snakemake'             ,
 | 
						|
            'styl'        => 'Stylus'                ,
 | 
						|
            'i'           => 'SWIG'                  ,
 | 
						|
            'svelte'      => 'Svelte'                ,
 | 
						|
            'sv'          => 'Verilog-SystemVerilog' ,
 | 
						|
            'svh'         => 'Verilog-SystemVerilog' ,
 | 
						|
            'svg'         => 'SVG'                   ,
 | 
						|
            'SVG'         => 'SVG'                   ,
 | 
						|
            'v'           => 'Verilog-SystemVerilog/Coq' ,
 | 
						|
            'td'          => 'TableGen'              ,
 | 
						|
            'tcl'         => 'Tcl/Tk'                ,
 | 
						|
            'tcsh'        => 'C Shell'               ,
 | 
						|
            'tk'          => 'Tcl/Tk'                ,
 | 
						|
            'teal'        => 'TEAL'                  ,
 | 
						|
            'mkvi'        => 'TeX'                   ,
 | 
						|
            'mkiv'        => 'TeX'                   ,
 | 
						|
            'mkii'        => 'TeX'                   ,
 | 
						|
            'ltx'         => 'TeX'                   ,
 | 
						|
            'lbx'         => 'TeX'                   ,
 | 
						|
            'ins'         => 'TeX'                   ,
 | 
						|
            'cbx'         => 'TeX'                   ,
 | 
						|
            'bib'         => 'TeX'                   ,
 | 
						|
            'bbx'         => 'TeX'                   ,
 | 
						|
            'aux'         => 'TeX'                   ,
 | 
						|
            'tex'         => 'TeX'                   , # TeX, LaTex, MikTex, ..
 | 
						|
            'toml'        => 'TOML'                  ,
 | 
						|
            'sty'         => 'TeX'                   ,
 | 
						|
#           'cls'         => 'TeX'                   ,
 | 
						|
            'dtx'         => 'TeX'                   ,
 | 
						|
            'bst'         => 'TeX'                   ,
 | 
						|
            'txt'         => 'Text'                  ,
 | 
						|
            'text'        => 'Text'                  ,
 | 
						|
            'tres'        => 'Godot Resource'        ,
 | 
						|
            'tscn'        => 'Godot Scene'           ,
 | 
						|
            'thrift'      => 'Thrift'                ,
 | 
						|
            'tla'         => 'TLA+'                  ,
 | 
						|
            'tpl'         => 'Smarty'                ,
 | 
						|
            'trigger'     => 'Apex Trigger'          ,
 | 
						|
            'ttcn'        => 'TTCN'                  ,
 | 
						|
            'ttcn2'       => 'TTCN'                  ,
 | 
						|
            'ttcn3'       => 'TTCN'                  ,
 | 
						|
            'ttcnpp'      => 'TTCN'                  ,
 | 
						|
            'sdl'         => 'TNSDL'                 ,
 | 
						|
            'ssc'         => 'TNSDL'                 ,
 | 
						|
            'sdt'         => 'TNSDL'                 ,
 | 
						|
            'spd'         => 'TNSDL'                 ,
 | 
						|
            'sst'         => 'TNSDL'                 ,
 | 
						|
            'rou'         => 'TNSDL'                 ,
 | 
						|
            'cin'         => 'TNSDL'                 ,
 | 
						|
            'cii'         => 'TNSDL'                 ,
 | 
						|
            'interface'   => 'TNSDL'                 ,
 | 
						|
            'in1'         => 'TNSDL'                 ,
 | 
						|
            'in2'         => 'TNSDL'                 ,
 | 
						|
            'in3'         => 'TNSDL'                 ,
 | 
						|
            'in4'         => 'TNSDL'                 ,
 | 
						|
            'inf'         => 'TNSDL'                 ,
 | 
						|
            'tpd'         => 'TITAN Project File Information',
 | 
						|
            'ts'          => 'TypeScript/Qt Linguist',
 | 
						|
            'mts'         => 'TypeScript'            ,
 | 
						|
            'tsx'         => 'TypeScript'            ,
 | 
						|
            'tss'         => 'Titanium Style Sheet'  ,
 | 
						|
            'twig'        => 'Twig'                  ,
 | 
						|
            'typ'         => 'Typst'                 ,
 | 
						|
            'um'          => 'Umka'                  ,
 | 
						|
            'ui'          => 'Qt/Glade'              ,
 | 
						|
            'glade'       => 'Glade'                 ,
 | 
						|
            'vala'        => 'Vala'                  ,
 | 
						|
            'vapi'        => 'Vala Header'           ,
 | 
						|
            'vhw'         => 'VHDL'                  ,
 | 
						|
            'vht'         => 'VHDL'                  ,
 | 
						|
            'vhs'         => 'VHDL'                  ,
 | 
						|
            'vho'         => 'VHDL'                  ,
 | 
						|
            'vhi'         => 'VHDL'                  ,
 | 
						|
            'vhf'         => 'VHDL'                  ,
 | 
						|
            'vhd'         => 'VHDL'                  ,
 | 
						|
            'VHD'         => 'VHDL'                  ,
 | 
						|
            'vhdl'        => 'VHDL'                  ,
 | 
						|
            'VHDL'        => 'VHDL'                  ,
 | 
						|
            'bas'         => 'Visual Basic'          ,
 | 
						|
            'BAS'         => 'Visual Basic'          ,
 | 
						|
            'ctl'         => 'Visual Basic'          ,
 | 
						|
            'dsr'         => 'Visual Basic'          ,
 | 
						|
            'frm'         => 'Visual Basic'          ,
 | 
						|
            'frx'         => 'Visual Basic'          ,
 | 
						|
            'FRX'         => 'Visual Basic'          ,
 | 
						|
            'vba'         => 'VB for Applications'   ,
 | 
						|
            'VBA'         => 'VB for Applications'   ,
 | 
						|
            'vbhtml'      => 'Visual Basic'          ,
 | 
						|
            'VBHTML'      => 'Visual Basic'          ,
 | 
						|
            'vbproj'      => 'Visual Basic .NET'     ,
 | 
						|
            'vbp'         => 'Visual Basic'          , # .vbp - autogenerated
 | 
						|
            'vbs'         => 'Visual Basic Script'   ,
 | 
						|
            'VBS'         => 'Visual Basic Script'   ,
 | 
						|
            'vb'          => 'Visual Basic .NET'     ,
 | 
						|
            'VB'          => 'Visual Basic .NET'     ,
 | 
						|
            'vbw'         => 'Visual Basic'          , # .vbw - autogenerated
 | 
						|
            'vue'         => 'Vuejs Component'       ,
 | 
						|
            'vy'          => 'Vyper'                 ,
 | 
						|
            'webinfo'     => 'ASP.NET'               ,
 | 
						|
            'wsdl'        => 'Web Services Description',
 | 
						|
            'x'           => 'Logos'                 ,
 | 
						|
            'xm'          => 'Logos'                 ,
 | 
						|
            'xpo'         => 'X++'                   , # Microsoft Dynamics AX 4.0 export format
 | 
						|
            'xmi'         => 'XMI'                   ,
 | 
						|
            'XMI'         => 'XMI'                   ,
 | 
						|
            'zcml'        => 'XML'                   ,
 | 
						|
            'xul'         => 'XML'                   ,
 | 
						|
            'xspec'       => 'XML'                   ,
 | 
						|
            'xproj'       => 'XML'                   ,
 | 
						|
            'xml.dist'    => 'XML'                   ,
 | 
						|
            'xliff'       => 'XML'                   ,
 | 
						|
            'xlf'         => 'XML'                   ,
 | 
						|
            'xib'         => 'XML'                   ,
 | 
						|
            'xacro'       => 'XML'                   ,
 | 
						|
            'x3d'         => 'XML'                   ,
 | 
						|
            'wsf'         => 'XML'                   ,
 | 
						|
            'web.release.config'=> 'XML'             ,
 | 
						|
            'web.debug.config'=> 'XML'               ,
 | 
						|
            'web.config'  => 'XML'                   ,
 | 
						|
            'wxml'        => 'WXML'                  ,
 | 
						|
            'wxss'        => 'WXSS'                  ,
 | 
						|
            'vxml'        => 'XML'                   ,
 | 
						|
            'vstemplate'  => 'XML'                   ,
 | 
						|
            'vssettings'  => 'XML'                   ,
 | 
						|
            'vsixmanifest'=> 'XML'                   ,
 | 
						|
            'vcxproj'     => 'XML'                   ,
 | 
						|
            'ux'          => 'XML'                   ,
 | 
						|
            'urdf'        => 'XML'                   ,
 | 
						|
            'tmtheme'     => 'XML'                   ,
 | 
						|
            'tmsnippet'   => 'XML'                   ,
 | 
						|
            'tmpreferences'=> 'XML'                  ,
 | 
						|
            'tmlanguage'  => 'XML'                   ,
 | 
						|
            'tml'         => 'XML'                   ,
 | 
						|
            'tmcommand'   => 'XML'                   ,
 | 
						|
            'targets'     => 'XML'                   ,
 | 
						|
            'sublime-snippet'=> 'XML'                   ,
 | 
						|
            'sttheme'     => 'XML'                   ,
 | 
						|
            'storyboard'  => 'XML'                   ,
 | 
						|
            'srdf'        => 'XML'                   ,
 | 
						|
            'shproj'      => 'XML'                   ,
 | 
						|
            'sfproj'      => 'XML'                   ,
 | 
						|
            'settings.stylecop'=> 'XML'                   ,
 | 
						|
            'scxml'       => 'XML'                   ,
 | 
						|
            'rss'         => 'XML'                   ,
 | 
						|
            'resx'        => 'XML'                   ,
 | 
						|
            'rdf'         => 'XML'                   ,
 | 
						|
            'pt'          => 'XML'                   ,
 | 
						|
            'psc1'        => 'XML'                   ,
 | 
						|
            'ps1xml'      => 'XML'                   ,
 | 
						|
            'props'       => 'XML'                   ,
 | 
						|
            'proj'        => 'XML'                   ,
 | 
						|
            'plist'       => 'XML'                   ,
 | 
						|
            'pkgproj'     => 'XML'                   ,
 | 
						|
            'packages.config'=> 'XML'                   ,
 | 
						|
            'osm'         => 'XML'                   ,
 | 
						|
            'odd'         => 'XML'                   ,
 | 
						|
            'nuspec'      => 'XML'                   ,
 | 
						|
            'nuget.config'=> 'XML'                   ,
 | 
						|
            'nproj'       => 'XML'                   ,
 | 
						|
            'ndproj'      => 'XML'                   ,
 | 
						|
            'natvis'      => 'XML'                   ,
 | 
						|
            'mjml'        => 'XML'                   ,
 | 
						|
            'mdpolicy'    => 'XML'                   ,
 | 
						|
            'launch'      => 'XML'                   ,
 | 
						|
            'kml'         => 'XML'                   ,
 | 
						|
            'jsproj'      => 'XML'                   ,
 | 
						|
            'jelly'       => 'XML'                   ,
 | 
						|
            'ivy'         => 'XML'                   ,
 | 
						|
            'iml'         => 'XML'                   ,
 | 
						|
            'grxml'       => 'XML'                   ,
 | 
						|
            'gmx'         => 'XML'                   ,
 | 
						|
            'fsproj'      => 'XML'                   ,
 | 
						|
            'filters'     => 'XML'                   ,
 | 
						|
            'dotsettings' => 'XML'                   ,
 | 
						|
            'dll.config'  => 'XML'                   ,
 | 
						|
            'ditaval'     => 'XML'                   ,
 | 
						|
            'ditamap'     => 'XML'                   ,
 | 
						|
            'depproj'     => 'XML'                   ,
 | 
						|
            'ct'          => 'XML'                   ,
 | 
						|
            'csl'         => 'XML'                   ,
 | 
						|
            'csdef'       => 'XML'                   ,
 | 
						|
            'cscfg'       => 'XML'                   ,
 | 
						|
            'cproject'    => 'XML'                   ,
 | 
						|
            'clixml'      => 'XML'                   ,
 | 
						|
            'ccxml'       => 'XML'                   ,
 | 
						|
            'ccproj'      => 'XML'                   ,
 | 
						|
            'builds'      => 'XML'                   ,
 | 
						|
            'axml'        => 'XML'                   ,
 | 
						|
            'app.config'  => 'XML'                   ,
 | 
						|
            'ant'         => 'XML'                   ,
 | 
						|
            'admx'        => 'XML'                   ,
 | 
						|
            'adml'        => 'XML'                   ,
 | 
						|
            'project'     => 'XML'                   ,
 | 
						|
            'classpath'   => 'XML'                   ,
 | 
						|
            'xml'         => 'XML'                   ,
 | 
						|
            'XML'         => 'XML'                   ,
 | 
						|
            'mxml'        => 'MXML'                  ,
 | 
						|
            'xml.builder' => 'builder'               ,
 | 
						|
            'build'       => 'NAnt script'           ,
 | 
						|
            'vim'         => 'vim script'            ,
 | 
						|
            'swift'       => 'Swift'                 ,
 | 
						|
            'xaml'        => 'XAML'                  ,
 | 
						|
            'wast'        => 'WebAssembly'           ,
 | 
						|
            'wat'         => 'WebAssembly'           ,
 | 
						|
            'wgsl'        => 'WGSL'                  ,
 | 
						|
            'wxs'         => 'WiX source'            ,
 | 
						|
            'wxi'         => 'WiX include'           ,
 | 
						|
            'wxl'         => 'WiX string localization' ,
 | 
						|
            'prw'         => 'xBase'                 ,
 | 
						|
            'prg'         => 'xBase'                 ,
 | 
						|
            'ch'          => 'xBase Header'          ,
 | 
						|
            'xqy'         => 'XQuery'                ,
 | 
						|
            'xqm'         => 'XQuery'                ,
 | 
						|
            'xql'         => 'XQuery'                ,
 | 
						|
            'xq'          => 'XQuery'                ,
 | 
						|
            'xquery'      => 'XQuery'                ,
 | 
						|
            'xsd'         => 'XSD'                   ,
 | 
						|
            'XSD'         => 'XSD'                   ,
 | 
						|
            'xslt'        => 'XSLT'                  ,
 | 
						|
            'XSLT'        => 'XSLT'                  ,
 | 
						|
            'xsl'         => 'XSLT'                  ,
 | 
						|
            'XSL'         => 'XSLT'                  ,
 | 
						|
            'xtend'       => 'Xtend'                 ,
 | 
						|
            'yacc'        => 'yacc'                  ,
 | 
						|
            'y'           => 'yacc'                  ,
 | 
						|
            'yml.mysql'   => 'YAML'                  ,
 | 
						|
            'yaml-tmlanguage'=> 'YAML'                  ,
 | 
						|
            'syntax'      => 'YAML'                  ,
 | 
						|
            'sublime-syntax'=> 'YAML'                  ,
 | 
						|
            'rviz'        => 'YAML'                  ,
 | 
						|
            'reek'        => 'YAML'                  ,
 | 
						|
            'mir'         => 'YAML'                  ,
 | 
						|
            'glide.lock'  => 'YAML'                  ,
 | 
						|
            'gemrc'       => 'YAML'                  ,
 | 
						|
            'clang-tidy'  => 'YAML'                  ,
 | 
						|
            'clang-format'=> 'YAML'                  ,
 | 
						|
            'yaml'        => 'YAML'                  ,
 | 
						|
            'yml'         => 'YAML'                  ,
 | 
						|
            'zig'         => 'Zig'                   ,
 | 
						|
            'zsh'         => 'zsh'                   ,
 | 
						|
            );
 | 
						|
# 1}}}
 | 
						|
%{$rh_Language_by_Script}    = (             # {{{1
 | 
						|
            'awk'      => 'awk'                   ,
 | 
						|
            'bash'     => 'Bourne Again Shell'    ,
 | 
						|
            'bc'       => 'bc'                    ,# calculator
 | 
						|
            'crystal'  => 'Crystal'               ,
 | 
						|
            'csh'      => 'C Shell'               ,
 | 
						|
            'dmd'      => 'D'                     ,
 | 
						|
            'dtrace'   => 'dtrace'                ,
 | 
						|
            'escript'  => 'Erlang'                ,
 | 
						|
            'groovy'   => 'Groovy'                ,
 | 
						|
            'idl'      => 'IDL'                   ,
 | 
						|
            'kermit'   => 'Kermit'                ,
 | 
						|
            'ksh'      => 'Korn Shell'            ,
 | 
						|
            'lua'      => 'Lua'                   ,
 | 
						|
            'luau'     => 'Luau'                  ,
 | 
						|
            'make'     => 'make'                  ,
 | 
						|
            'octave'   => 'Octave'                ,
 | 
						|
            'perl5'    => 'Perl'                  ,
 | 
						|
            'perl'     => 'Perl'                  ,
 | 
						|
            'miniperl' => 'Perl'                  ,
 | 
						|
            'php'      => 'PHP'                   ,
 | 
						|
            'php5'     => 'PHP'                   ,
 | 
						|
            'python'   => 'Python'                ,
 | 
						|
            'python2.6'=> 'Python'                ,
 | 
						|
            'python2.7'=> 'Python'                ,
 | 
						|
            'python3'  => 'Python'                ,
 | 
						|
            'python3.3'=> 'Python'                ,
 | 
						|
            'python3.4'=> 'Python'                ,
 | 
						|
            'python3.5'=> 'Python'                ,
 | 
						|
            'python3.6'=> 'Python'                ,
 | 
						|
            'python3.7'=> 'Python'                ,
 | 
						|
            'python3.8'=> 'Python'                ,
 | 
						|
            'perl6'    => 'Raku'                  ,
 | 
						|
            'raku'     => 'Raku'                  ,
 | 
						|
            'rakudo'   => 'Raku'                  ,
 | 
						|
            'rexx'     => 'Rexx'                  ,
 | 
						|
            'regina'   => 'Rexx'                  ,
 | 
						|
            'ruby'     => 'Ruby'                  ,
 | 
						|
            'sed'      => 'sed'                   ,
 | 
						|
            'sh'       => 'Bourne Shell'          ,
 | 
						|
            'swipl'    => 'Prolog'                ,
 | 
						|
            'tcl'      => 'Tcl/Tk'                ,
 | 
						|
            'tclsh'    => 'Tcl/Tk'                ,
 | 
						|
            'tcsh'     => 'C Shell'               ,
 | 
						|
            'wish'     => 'Tcl/Tk'                ,
 | 
						|
            'zsh'      => 'zsh'                   ,
 | 
						|
            );
 | 
						|
# 1}}}
 | 
						|
%{$rh_Language_by_File}      = (             # {{{1
 | 
						|
            'build.xml'         => 'Ant/XML'            ,
 | 
						|
            'BUILD'             => 'Bazel'              ,
 | 
						|
            'WORKSPACE'         => 'Bazel'              ,
 | 
						|
            'cmakelists.txt'    => 'CMake'              ,
 | 
						|
            'CMakeLists.txt'    => 'CMake'              ,
 | 
						|
            'Jamfile'           => 'Jam'                ,
 | 
						|
            'jamfile'           => 'Jam'                ,
 | 
						|
            'Jamrules'          => 'Jam'                ,
 | 
						|
            'Makefile'          => 'make'               ,
 | 
						|
            'makefile'          => 'make'               ,
 | 
						|
            'meson.build'       => 'Meson'              ,
 | 
						|
            'Gnumakefile'       => 'make'               ,
 | 
						|
            'gnumakefile'       => 'make'               ,
 | 
						|
            'pom.xml'           => 'Maven/XML'          ,
 | 
						|
            'Rakefile'          => 'Ruby'               ,
 | 
						|
            'rakefile'          => 'Ruby'               ,
 | 
						|
            'Snakefile'         => 'Snakemake'          ,
 | 
						|
            'Dockerfile'        => 'Dockerfile'         ,
 | 
						|
            'Dockerfile.m4'     => 'Dockerfile'         ,
 | 
						|
            'Dockerfile.cmake'  => 'Dockerfile'         ,
 | 
						|
            'dockerfile'        => 'Dockerfile'         ,
 | 
						|
            'dockerfile.m4'     => 'Dockerfile'         ,
 | 
						|
            'dockerfile.cmake'  => 'Dockerfile'         ,
 | 
						|
            'Containerfile'     => 'Containerfile'      ,
 | 
						|
            );
 | 
						|
# 1}}}
 | 
						|
%{$rh_Language_by_Prefix}     = (             # {{{1
 | 
						|
            'Dockerfile'        => 'Dockerfile'         ,
 | 
						|
            'Containerfile'     => 'Containerfile'         ,
 | 
						|
            );
 | 
						|
# 1}}}
 | 
						|
%{$rhaa_Filters_by_Language} = (            # {{{1
 | 
						|
    '(unknown)'          => [ ],
 | 
						|
    'ABAP'               => [   [ 'remove_matches'      , '^\*'    ], ],
 | 
						|
    'ActionScript'       => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Apex Class'         => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'AppleScript'         => [
 | 
						|
                                [ 'remove_matches'      , '^\s*--' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '(*', '*)' ],
 | 
						|
                                [ 'remove_between_general', '(*', '*)' ],
 | 
						|
                                [ 'remove_inline'       , '--.*$'  ],
 | 
						|
                            ],
 | 
						|
    'ASP'                => [   [ 'remove_matches'      , '^\s*\47'], ],  # \47 = '
 | 
						|
    'ASP.NET'            => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_between_general', '<%--', '--%>' ],
 | 
						|
                                [ 'remove_between_general', '<!--', '-->' ],
 | 
						|
                            ],
 | 
						|
    'Ada'                => [   [ 'remove_matches'      , '^\s*--' ], ],
 | 
						|
    'ADSO/IDSM'          => [   [ 'remove_matches'      , '^\s*\*[\+\!]' ], ],
 | 
						|
    'Agda'               => [   [ 'remove_haskell_comments', '>filename<' ], ],
 | 
						|
    'AMPLE'              => [   [ 'remove_matches'      , '^\s*//' ], ],
 | 
						|
    'APL'                => [
 | 
						|
                                [ 'remove_matches'      , '^\s*⍝' ],
 | 
						|
                            ],
 | 
						|
    'AnsProlog'          => [
 | 
						|
                                [ 'remove_between_general', '%*', '*%' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*\%' ],
 | 
						|
                                [ 'remove_inline'       , '(//|\%).*$' ],
 | 
						|
                            ],
 | 
						|
    'Ant/XML'            => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'ANTLR Grammar'      => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Ant'                => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'Apex Trigger'       => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Arduino Sketch'     => [ # Arduino IDE inserts problematic 0xA0 characters; strip them
 | 
						|
                                [ 'replace_regex' , '\xa0', " " ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'ArkTs'              => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Arturo'             => [
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                                [ 'remove_inline'       , ';.*$'   ],
 | 
						|
                            ],
 | 
						|
    'AsciiDoc'           => [
 | 
						|
                                [ 'remove_between_general', '////', '////' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*\/\/'  ],
 | 
						|
                            ],
 | 
						|
    'AspectJ'            => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Assembly'           => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                                [ 'remove_matches'      , '^\s*\@' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*\|' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*!'  ],
 | 
						|
                                [ 'remove_matches'      , '^\s*--' ],
 | 
						|
                                [ 'remove_inline'       , ';.*$'   ],
 | 
						|
                                [ 'remove_inline'       , '\@.*$'  ],
 | 
						|
                                [ 'remove_inline'       , '\|.*$'  ],
 | 
						|
                                [ 'remove_inline'       , '!.*$'   ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                                [ 'remove_inline'       , '--.*$'  ],
 | 
						|
                                [ 'remove_matches'      , '^\*'    ],  # z/OS Assembly
 | 
						|
                            ],
 | 
						|
    'Astro'              => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Asymptote'          => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'AutoHotkey'         => [
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                                [ 'remove_inline'       , ';.*$'   ],
 | 
						|
                            ],
 | 
						|
    'awk'                => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Bazel'              => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'bc'                 => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Blade'              => [
 | 
						|
                                [ 'remove_between_general', '{{--', '--}}' ],
 | 
						|
                                [ 'remove_html_comments',                  ],
 | 
						|
                            ],
 | 
						|
    'Bourne Again Shell' => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Bourne Shell'       => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Brainfuck'          => [ # puerile name for a language
 | 
						|
#                               [ 'call_regexp_common'  , 'Brainfuck' ],  # inaccurate
 | 
						|
                                [ 'remove_bf_comments',               ],
 | 
						|
                            ],
 | 
						|
    'BrightScript'       => [
 | 
						|
                                [ 'remove_matches'      , '^\s*rem', ],
 | 
						|
                                [ 'remove_matches'      , '^\s*\'',  ],
 | 
						|
                            ],
 | 
						|
    'builder'            => [
 | 
						|
                                [ 'remove_matches'      , '^\s*xml_markup.comment!'  ],
 | 
						|
                            ],
 | 
						|
    'C'                  => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Chapel'       => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'C++'                => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'C/C++ Header'       => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Carbon'             => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Clean'              => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Clojure'            => [
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                                [ 'remove_matches'      , '^\s*#_'  ],
 | 
						|
                            ],
 | 
						|
    'ClojureScript'      => [   [ 'remove_matches'      , '^\s*;'  ], ],
 | 
						|
    'ClojureC'           => [   [ 'remove_matches'      , '^\s*;'  ], ],
 | 
						|
    'CMake'              => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Crystal'            => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Constraint Grammar' => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'CUDA'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Cython'             => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'docstring_to_C'                 ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'C#/Smalltalk' => [ [ 'die' ,  ], ], # never called
 | 
						|
    'C#'                 => [
 | 
						|
#                               [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'C# Designer'        => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'C# Generated'        => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Cake Build Script'  => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'CCS'                => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                            ],
 | 
						|
    'CSS'                => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                            ],
 | 
						|
    'CSV'                => [  # comma separated value files have no comments;
 | 
						|
                                [ 'remove_matches'      , '^\s*$'  ],
 | 
						|
                            ], # included simply to allow diff's
 | 
						|
    'COBOL'              => [   [ 'remove_cobol_comments',         ], ],
 | 
						|
    'CoCoA 5'            => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                                [ 'remove_matches'      , '^\s*--' ],
 | 
						|
                                [ 'remove_inline'       , '--.*$'  ],
 | 
						|
                            ],
 | 
						|
    'CoffeeScript'       => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'ColdFusion'         => [   [ 'remove_html_comments',          ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ], ],
 | 
						|
    'ColdFusion CFScript'=> [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Containerfile'      => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Coq'                => [
 | 
						|
                                [ 'remove_between_general', '(*', '*)' ],
 | 
						|
                            ],
 | 
						|
    'Crystal Reports'    => [   [ 'remove_matches'      , '^\s*//' ], ],
 | 
						|
    'CSON'               => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Cucumber'           => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                            ],
 | 
						|
    'D/dtrace'           => [ [ 'die' ,          ], ], # never called
 | 
						|
    'D'                  => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/+', '+/' ],
 | 
						|
                                [ 'remove_between_general', '/+', '+/' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'DAL'                => [
 | 
						|
                                [ 'remove_between_general', '[', ']', ],
 | 
						|
                            ],
 | 
						|
    'Dart'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'DenizenScript'      => [ # same as YAML
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Derw'               => [
 | 
						|
                                [ 'remove_matches'      , '^\s*--' ],
 | 
						|
                                [ 'remove_between_general', '{-', '-}' ],
 | 
						|
                                [ 'remove_inline'       , '--.*$'  ],
 | 
						|
                            ],
 | 
						|
    'Dafny'              => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'dhall'              => [   [ 'remove_haskell_comments', '>filename<' ], ],
 | 
						|
    'Delphi Form'        => [ # same as Pascal
 | 
						|
                                [ 'remove_between_regex', '\{[^$]', '}' ],
 | 
						|
                                [ 'remove_between_general', '(*', '*)' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                            ],
 | 
						|
    'DIET'               => [  # same as Pug
 | 
						|
                                [ 'remove_pug_block'    ,          ],
 | 
						|
                                [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                                [ 'remove_inline'       , '//.*$'  ],
 | 
						|
                            ],
 | 
						|
    # diff is kind of weird: anything but a space in the first column
 | 
						|
    # will count as code, with the exception of #, ---, +++.  Spaces
 | 
						|
    # in the first column denote context lines which aren't part of the
 | 
						|
    # difference.
 | 
						|
    'diff'               => [
 | 
						|
                                [ 'remove_matches'      , '^#' ],
 | 
						|
                                [ 'remove_matches'      , '^\-\-\-' ],
 | 
						|
                                [ 'remove_matches'      , '^\+\+\+' ],
 | 
						|
                                [ 'remove_matches'      , '^\s' ],
 | 
						|
                            ],
 | 
						|
    'DITA'               => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'DOORS Extension Language' => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Drools'             => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'dtrace'             => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'ECPP'               => [
 | 
						|
                                [ 'remove_between_general',
 | 
						|
                                  '<%doc>', '</%doc>',             ],
 | 
						|
                                [ 'remove_between_general',
 | 
						|
                                  '<#'    , '#>'     ,             ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'EEx'                => [
 | 
						|
                                [ 'remove_between_general', '<%#', '%>' ],
 | 
						|
                            ],
 | 
						|
    'EJS'                => [
 | 
						|
                                [ 'remove_between_general', '<%#', '%>' ],
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                            ],
 | 
						|
    'Elm'                => [   [ 'remove_haskell_comments', '>filename<' ], ],
 | 
						|
    'Embedded Crystal'   => [
 | 
						|
                                [ 'remove_between_general', '<%#', '%>' ],
 | 
						|
                            ],
 | 
						|
    'ERB'                => [
 | 
						|
                                [ 'remove_between_general', '<%#', '%>' ],
 | 
						|
                            ],
 | 
						|
    'Gencat NLS'         => [   [ 'remove_matches'       , '^\$ .*$' ], ],
 | 
						|
    'NASTRAN DMAP'       => [
 | 
						|
                                [ 'remove_matches'      , '^\s*\$' ],
 | 
						|
                                [ 'remove_inline'       , '\$.*$'  ],
 | 
						|
                            ],
 | 
						|
    'Dockerfile'         => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'DOS Batch'          => [
 | 
						|
                                [ 'remove_matches'      , '^\s*rem' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*::'  ],
 | 
						|
                            ],
 | 
						|
    'DTD'                => [   [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ], ],
 | 
						|
    'Elixir'             => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'elixir_doc_to_C'                ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Erlang'             => [
 | 
						|
                                [ 'remove_matches'      , '^\s*%'  ],
 | 
						|
                                [ 'remove_inline'       , '%.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Expect'             => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Fennel'             => [
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                                [ 'remove_inline'       , ';.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Finite State Language' => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Fish Shell'         => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Focus'              => [   [ 'remove_matches'      , '^\s*\-\*'  ], ],
 | 
						|
    'Forth'              => [
 | 
						|
                                [ 'remove_matches'      , '^\s*\\\\.*$'  ],
 | 
						|
                                [ 'Forth_paren_to_C'                 ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'      ],
 | 
						|
                                [ 'remove_inline'       , '\\\\.*$'  ],
 | 
						|
                            ],
 | 
						|
    'Fortran 77'         => [
 | 
						|
                                [ 'remove_f77_comments' ,          ],
 | 
						|
                                [ 'remove_inline'       , '\!.*$'  ],
 | 
						|
                            ],
 | 
						|
    'Fortran 77/Forth'   => [ [ 'die' ,          ], ], # never called
 | 
						|
    'F#/Forth'           => [ [ 'die' ,          ], ], # never called
 | 
						|
    'Fortran 90'         => [
 | 
						|
                                [ 'remove_f77_comments' ,          ],
 | 
						|
                                [ 'remove_f90_comments' ,          ],
 | 
						|
                                [ 'remove_inline'       , '\!.*$'  ],
 | 
						|
                            ],
 | 
						|
    'Fortran 95'         => [
 | 
						|
                                [ 'remove_f77_comments' ,          ],
 | 
						|
                                [ 'remove_f90_comments' ,          ],
 | 
						|
                                [ 'remove_inline'       , '\!.*$'  ],
 | 
						|
                            ],
 | 
						|
    'Freemarker Template' => [
 | 
						|
                                [ 'remove_between_general', '<#--', '-->' ],
 | 
						|
                            ],
 | 
						|
    'Futhark'            => [
 | 
						|
                                [ 'remove_matches'      , '^\s*--'  ],
 | 
						|
                            ],
 | 
						|
    'FXML'               => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'F#'                 => [
 | 
						|
                                [ 'remove_between_general', '(*', '*)' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                            ],
 | 
						|
    'F# Script'          => [
 | 
						|
                                [ 'call_regexp_common'  , 'Pascal' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                            ],
 | 
						|
    'Flatbuffers'        => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                                [ 'remove_inline'       , '//.*$'  ],
 | 
						|
                            ],
 | 
						|
    'Godot Scene'        => [
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                            ],
 | 
						|
    'Godot Resource'     => [
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                            ],
 | 
						|
    'Godot Shaders'      => [
 | 
						|
                                [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                                [ 'remove_inline'       , '//.*$'  ],
 | 
						|
                            ],
 | 
						|
    'GDScript'           => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Glade'              => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'Gleam'              => [
 | 
						|
                                [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                                [ 'remove_inline'       , '//.*$'  ],
 | 
						|
                            ],
 | 
						|
    'Glimmer JavaScript' => [
 | 
						|
                                [ 'remove_between_general', '{{!', '}}' ],
 | 
						|
                                [ 'remove_between_general', '<!--', '-->' ],
 | 
						|
                                [ 'rm_comments_in_strings', "'", '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', "'", '//', '' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Glimmer TypeScript' => [
 | 
						|
                                [ 'remove_between_general', '{{!', '}}' ],
 | 
						|
                                [ 'remove_between_general', '<!--', '-->' ],
 | 
						|
                                [ 'rm_comments_in_strings', "'", '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', "'", '//', '' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'GLSL'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Go'                 => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '["`]', '*/*', '' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                                [ 'remove_inline'       , '//.*$'  ],
 | 
						|
                            ],
 | 
						|
    'Gradle'             => [ # same as Groovy
 | 
						|
                                [ 'remove_inline'       , '//.*$'  ],
 | 
						|
                                # separate /* inside quoted strings with two
 | 
						|
                                # concatenated strings split between / and *
 | 
						|
                                [ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\1',
 | 
						|
                                  '(.*?)' , '"$1$2$1 + $1$3$1$4"', 0],
 | 
						|
                                [ 'rm_comments_in_strings', '"""', '/*', '*/', 1],
 | 
						|
                                [ 'rm_comments_in_strings', '"""', '//', '', 1],
 | 
						|
                                [ 'rm_comments_in_strings', "'''", '/*', '*/', 1],
 | 
						|
                                [ 'rm_comments_in_strings', "'''", '//', '', 1],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Grails'             => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                                [ 'remove_jsp_comments' ,          ],
 | 
						|
                                [ 'add_newlines'        ,          ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'GraphQL'            => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'docstring_to_C'                 ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Groovy'             => [
 | 
						|
                                [ 'remove_inline'       , '//.*$'  ],
 | 
						|
                                # separate /* inside quoted strings with two
 | 
						|
                                # concatenated strings split between / and *
 | 
						|
                                [ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\1',
 | 
						|
                                  '(.*?)' , '"$1$2$1 + $1$3$1$4"', 0],
 | 
						|
                                [ 'rm_comments_in_strings', '"""', '/*', '*/', 1],
 | 
						|
                                [ 'rm_comments_in_strings', '"""', '//', '', 1],
 | 
						|
                                [ 'rm_comments_in_strings', "'''", '/*', '*/', 1],
 | 
						|
                                [ 'rm_comments_in_strings', "'''", '//', '', 1],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Haml'               => [
 | 
						|
                                [ 'remove_haml_block'   ,          ],
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'remove_matches'      , '^\s*/\s*\S+' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*-#\s*\S+' ],
 | 
						|
                            ],
 | 
						|
    'Handlebars'         => [
 | 
						|
                                [ 'remove_between_general', '{{!--', '--}}' ],
 | 
						|
                                [ 'remove_between_general', '{{!', '}}' ],
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                            ],
 | 
						|
    'Harbour'            => [
 | 
						|
                                [ 'remove_matches'      , '^\s*\&\&' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*\*' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*NOTE' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*note' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*Note' ],
 | 
						|
                                [ 'remove_inline'       , '//.*$'  ],
 | 
						|
                                [ 'remove_inline'       , '\&\&.*$' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Hare'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'remove_matches'      , '//.*$' ],
 | 
						|
                            ],
 | 
						|
    'Haxe'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'HCL'                => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'HLSL'               => [
 | 
						|
                                [ 'remove_inline'       , '//.*$'  ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'HTML'               => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'HTML EEx'           => [
 | 
						|
                                [ 'remove_matches'       , '^\s*<% #' ],
 | 
						|
                                [ 'remove_between_general', '<%!--', '--%>' ],
 | 
						|
                            ],
 | 
						|
    'HolyC'              => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Hoon'               => [
 | 
						|
                                [ 'remove_matches'      , '^\s*:[:><]' ],
 | 
						|
                                [ 'remove_inline'       , ':[:><].*$'  ],
 | 
						|
                            ],
 | 
						|
    'INI'                => [
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                            ],
 | 
						|
    'XHTML'               => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'Haskell'            => [   [ 'remove_haskell_comments', '>filename<' ], ],
 | 
						|
    'IDL'                => [   [ 'remove_matches'      , '^\s*;'  ], ],
 | 
						|
    'IDL/Qt Project/Prolog/ProGuard' => [ [ 'die' ,          ], ], # never called
 | 
						|
    'Idris'              => [
 | 
						|
                                [ 'remove_haskell_comments', '>filename<' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*\|{3}' ],
 | 
						|
                            ],
 | 
						|
    'Igor Pro'           => [
 | 
						|
                                [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                                [ 'remove_inline'       , '//.*$'  ],
 | 
						|
                            ],
 | 
						|
    'Literate Idris'     => [
 | 
						|
                                [ 'remove_matches'      , '^[^>]'  ],
 | 
						|
                            ],
 | 
						|
    'Imba'               => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#\s'],
 | 
						|
                                [ 'remove_inline'       , '#\s.*$' ],
 | 
						|
                                [ 'remove_between_regex', '###', '###' ],
 | 
						|
                            ],
 | 
						|
    'InstallShield'      => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'IPL'                => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Jai'                => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Jam'                => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Janet'              => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'JSP'                => [   [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                                [ 'remove_jsp_comments' ,          ],
 | 
						|
                                [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                                [ 'add_newlines'        ,          ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                            ],
 | 
						|
    'JavaServer Faces'   => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Java'               => [
 | 
						|
								[ 'docstring_rm_comments', ],
 | 
						|
                                [ 'replace_regex', '\\\\$', ' '],
 | 
						|
                                # Java seems to have more path globs in strings
 | 
						|
                                # than other languages.  The variations makes
 | 
						|
                                # it tricky to craft a universal fix.
 | 
						|
                                # \1 is a backreference to the first group, meaning
 | 
						|
                                # either single or double quote
 | 
						|
                                [ 'replace_between_regex', '(["\'])(.*?/\*)(.*?)\1',
 | 
						|
                                  '(.*?)' , '"xx"'],
 | 
						|
                                [ 'replace_between_regex', '(["\'])(.*?\*/)(.*?)\1',
 | 
						|
                                  '(.*?)' , '"xx"'],
 | 
						|
                               ## separate /* inside quoted strings with two
 | 
						|
                               ## concatenated strings split between / and *
 | 
						|
                               ##    -> defeated by "xx/**/*_xx" issue 365
 | 
						|
                               #[ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\1',
 | 
						|
                               #  '(.*?)' , '"$1$2$1 + $1$3$1$4"'],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'JavaScript'         => [
 | 
						|
                                [ 'rm_comments_in_strings', "'", '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', "'", '//', '' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Jinja Template'     => [
 | 
						|
                                [ 'remove_between_general', '{#', '#}' ],
 | 
						|
                            ],
 | 
						|
    'JSX'                => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'JCL'                => [   [ 'remove_jcl_comments' ,          ], ],
 | 
						|
    'JSON'               => [   # ECMA-404, the JSON standard definition
 | 
						|
                                # makes no provision for JSON comments
 | 
						|
                                # so just use a placeholder filter
 | 
						|
                                [ 'remove_matches'      , '^\s*$'  ],
 | 
						|
                            ],
 | 
						|
    'JSON5'              => [   # same as JavaScript
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Julia'              => [
 | 
						|
                                [ 'remove_between_general', '#=', '=#' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'docstring_to_C'                 ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Juniper Junos'      => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'kvlang'             => [
 | 
						|
                                ["remove_matches", '^\s*#[^:]'],
 | 
						|
                            ],
 | 
						|
    'Kotlin'             => [
 | 
						|
                                [ 'rm_comments_in_strings', '"""', '/*', '*/', 1],
 | 
						|
                                [ 'rm_comments_in_strings', '"""', '//', '', 1],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Lean'               => [
 | 
						|
                                [ 'remove_between_general', '/-', '-/' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*--' ],
 | 
						|
                                [ 'remove_inline'       , '--.*$'  ],
 | 
						|
                            ],
 | 
						|
    'Lem'                => [
 | 
						|
                                [ 'remove_OCaml_comments',         ],
 | 
						|
                            ],
 | 
						|
    'LESS'               => [
 | 
						|
#                               [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'LFE'                => [
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                                [ 'remove_between_general', '#|', '|#' ],
 | 
						|
                            ],
 | 
						|
    'Linker Script'      => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/'],
 | 
						|
                                [ 'call_regexp_common',     'C'            ],
 | 
						|
                            ],
 | 
						|
    'liquid'             => [
 | 
						|
                                [ 'remove_between_general', '{% comment %}',
 | 
						|
                                                            '{% endcomment %}' ],
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                            ],
 | 
						|
    'Lisp'               => [
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                                [ 'remove_between_general', '#|', '|#' ],
 | 
						|
                            ],
 | 
						|
    'Lisp/OpenCL'        => [ [ 'die' ,          ], ], # never called
 | 
						|
    'Lisp/Julia'         => [ [ 'die' ,          ], ], # never called
 | 
						|
    'LiveLink OScript'   => [   [ 'remove_matches'      , '^\s*//' ], ],
 | 
						|
    'LLVM IR'            => [
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                                [ 'remove_inline'       , ';.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Logos'              => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Logtalk'            => [  # same filters as Prolog
 | 
						|
                                [ 'remove_matches'      , '^\s*\%' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_inline'       , '(//|\%).*$' ],
 | 
						|
                            ],
 | 
						|
    'Lua'                => [
 | 
						|
                                [ 'remove_between_general', '--[=====[', ']=====]' ],
 | 
						|
                                [ 'remove_between_general', '--[====[', ']====]' ],
 | 
						|
                                [ 'remove_between_general', '--[===[', ']===]' ],
 | 
						|
                                [ 'remove_between_general', '--[==[', ']==]' ],
 | 
						|
                                [ 'remove_between_general', '--[=[', ']=]' ],
 | 
						|
                                [ 'remove_between_general', '--[[', ']]' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*\-\-' ],
 | 
						|
                            ],
 | 
						|
    'Luau'               => [
 | 
						|
                                [ 'remove_between_general', '--[=====[', ']=====]' ],
 | 
						|
                                [ 'remove_between_general', '--[====[', ']====]' ],
 | 
						|
                                [ 'remove_between_general', '--[===[', ']===]' ],
 | 
						|
                                [ 'remove_between_general', '--[==[', ']==]' ],
 | 
						|
                                [ 'remove_between_general', '--[=[', ']=]' ],
 | 
						|
                                [ 'remove_between_general', '--[[', ']]' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*\-\-' ],
 | 
						|
                            ],
 | 
						|
    'make'               => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Meson'              => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'MATLAB'             => [
 | 
						|
                                [ 'remove_between_general', '%{', '%}' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*%'  ],
 | 
						|
                                [ 'remove_inline'       , '%.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Mathematica'        => [
 | 
						|
                                [ 'remove_between_general', '(*', '*)' ],
 | 
						|
                            ],
 | 
						|
    'Maven/XML'          => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'Maven'              => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'Mercury'            => [
 | 
						|
                                [ 'remove_inline'       , '%.*$'   ],
 | 
						|
                                [ 'remove_matches'      , '^\s*%'  ],
 | 
						|
                            ],
 | 
						|
    'Metal'              => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Modelica'           => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Modula3'            => [   [ 'call_regexp_common'  , 'Pascal' ], ],
 | 
						|
        # Modula 3 comments are (* ... *) so applying the Pascal filter
 | 
						|
        # which also treats { ... } as a comment is not really correct.
 | 
						|
    'Mojom'              => [   [ 'call_regexp_common' , 'C++' ], ],
 | 
						|
    'Mojo'               => [
 | 
						|
                                [ 'remove_matches'      , '/\*'    ],
 | 
						|
                                [ 'remove_matches'      , '\*/'    ],
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'docstring_to_C'                 ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Nemerle'            => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Nunjucks'           => [
 | 
						|
                                [ 'remove_between_general', '{#', '#}' ],
 | 
						|
                            ],
 | 
						|
    'Objective-C'        => [
 | 
						|
#                               [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Objective-C++'      => [
 | 
						|
#                               [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'OCaml'              => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '(*', '*)', 1 ],
 | 
						|
                                [ 'remove_OCaml_comments',         ],
 | 
						|
                            ],
 | 
						|
    'OpenCL'             => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'PHP/Pascal/Fortran'       => [ [ 'die' ,          ], ], # never called
 | 
						|
    'Mako'               => [
 | 
						|
                                [ 'remove_matches'       , '##.*$'  ],
 | 
						|
                            ],
 | 
						|
    'Markdown'           => [
 | 
						|
                                [ 'remove_between_general', '<!--', '-->' ],
 | 
						|
                                [ 'remove_between_regex',
 | 
						|
                                  '\[(comment|\/\/)?\]\s*:?\s*(<\s*>|#)?\s*\(.*?', '.*?\)' ],
 | 
						|
                                # http://stackoverflow.com/questions/4823468/comments-in-markdown
 | 
						|
                            ],
 | 
						|
    'MATLAB/Mathematica/Objective-C/MUMPS/Mercury' => [ [ 'die' ,          ], ], # never called
 | 
						|
    'MUMPS'              => [   [ 'remove_matches'      , '^\s*;'  ], ],
 | 
						|
    'Mustache'           => [
 | 
						|
                                [ 'remove_between_general', '{{!', '}}' ],
 | 
						|
                            ],
 | 
						|
    'Nickel'             => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Nim'                => [
 | 
						|
                                [ 'remove_between_general', '#[', ']#' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
#                               [ 'docstring_to_C'                 ],
 | 
						|
#                               [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'NetLogo'            => [
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                                [ 'remove_inline'       , ';.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Nix'                => [
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Octave'             => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Odin'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'OpenSCAD'           => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Oracle Forms'       => [   [ 'call_regexp_common'  , 'C'      ], ],
 | 
						|
    'Oracle Reports'     => [   [ 'call_regexp_common'  , 'C'      ], ],
 | 
						|
    'Oracle PL/SQL'      => [
 | 
						|
                                [ 'call_regexp_common'  , 'PL/SQL'      ],
 | 
						|
                            ],
 | 
						|
    'Pascal'             => [
 | 
						|
                                [ 'remove_between_regex', '\{[^$]', '}' ],
 | 
						|
                                [ 'remove_between_general', '(*', '*)' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                            ],
 | 
						|
####'Pascal'             => [
 | 
						|
####                            [ 'call_regexp_common'  , 'Pascal' ],
 | 
						|
####                            [ 'remove_matches'      , '^\s*//' ],
 | 
						|
####                        ],
 | 
						|
    'Pascal/Puppet'            => [ [ 'die' ,          ], ], # never called
 | 
						|
    'Puppet'             => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'   ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'       ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'P4'                 => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Patran Command Language'=> [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'   ],
 | 
						|
                                [ 'remove_matches'      , '^\s*\$#' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'       ],
 | 
						|
                            ],
 | 
						|
    'Perl'               => [   [ 'remove_below'        , '^__(END|DATA)__'],
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_below_above'  , '^=head1', '^=cut'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'PEG'                => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'peg.js'             => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'peggy'              => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Pest'               => [
 | 
						|
                                [ 'remove_matches'      , '^\s*//'  ],
 | 
						|
                                [ 'remove_inline'       , '//.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Perl/Prolog'        => [ [ 'die' ,          ], ], # never called
 | 
						|
    'PL/I'               => [
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                            ],
 | 
						|
    'PL/M'               => [
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                            ],
 | 
						|
    'PlantUML'           => [
 | 
						|
                                [ 'remove_between_general', "/'", "'/" ],
 | 
						|
                                [ 'remove_matches'      , "^\\s*'" ],
 | 
						|
                            ],
 | 
						|
    'Pig Latin'          => [
 | 
						|
                                [ 'remove_matches'      , '^\s*--' ],
 | 
						|
                                [ 'remove_inline'       , '--.*$'  ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'       ],
 | 
						|
                            ],
 | 
						|
    'Prisma Schema'      => [
 | 
						|
                                [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                            ],
 | 
						|
    'Processing'         => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'ProGuard'           => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'PO File'            => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#[^,]' ],  # '#,' is not a comment
 | 
						|
                            ],
 | 
						|
    'Pony'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'docstring_to_C'                 ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'PowerBuilder'       => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'PowerShell'         => [
 | 
						|
                                [ 'powershell_to_C'                ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Prolog'             => [
 | 
						|
                                [ 'remove_matches'      , '^\s*\%' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_inline'       , '(//|\%).*$' ],
 | 
						|
                            ],
 | 
						|
    'Properties'         => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_matches'      , '^\s*!'  ],
 | 
						|
                            ],
 | 
						|
    'Protocol Buffers'   => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'PRQL'               => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                            ],
 | 
						|
    'Pug'                => [
 | 
						|
                                [ 'remove_pug_block'    ,          ],
 | 
						|
                                [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                                [ 'remove_inline'       , '//.*$'  ],
 | 
						|
                            ],
 | 
						|
    'PureScript'         => [
 | 
						|
                                [ 'remove_matches'      , '^\s*--' ],
 | 
						|
                                [ 'remove_between_general', '{-', '-}' ],
 | 
						|
                                [ 'remove_inline'       , '--.*$'  ],
 | 
						|
                            ],
 | 
						|
    'Python'             => [
 | 
						|
                                [ 'remove_matches'      , '/\*'    ],
 | 
						|
                                [ 'remove_matches'      , '\*/'    ],
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'docstring_to_C'                 ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Jupyter Notebook'   => [   # these are JSON files; have no comments
 | 
						|
                                # would have to parse JSON for
 | 
						|
                                #      "cell_type": "code"
 | 
						|
                                # to count code lines
 | 
						|
                                [ 'jupyter_nb'                     ],
 | 
						|
                                [ 'remove_matches'      , '^\s*$'  ],
 | 
						|
                            ],
 | 
						|
    'PHP'                => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'QML'                => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                                [ 'remove_inline'       , '//.*$'  ],
 | 
						|
                            ],
 | 
						|
    'Qt'                 => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'Qt Linguist'        => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'Qt Project'         => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'R'                  => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Ring'               => [
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Rmd'                => [
 | 
						|
                                [ 'reduce_to_rmd_code_blocks'      ],
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Racket'             => [
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                                [ 'remove_inline'       , ';.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Raku'               => [   [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_below_above'  , '^=head1', '^=cut'  ],
 | 
						|
                                [ 'remove_below_above'  , '^=begin', '^=end'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Raku/Prolog'        => [ [ 'die' ,          ], ], # never called
 | 
						|
    'RAML'               => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'RapydScript'        => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'docstring_to_C'                 ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Razor'              => [
 | 
						|
                                [ 'remove_between_general', '@*', '*@' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                                [ 'remove_between_general', '<!--', '-->' ],
 | 
						|
                            ],
 | 
						|
    'reStructuredText'   => [
 | 
						|
                                [ 'remove_between_regex', '^\.\.', '^[^ \n\t\r\f\.]' ]
 | 
						|
                            ],
 | 
						|
    'Rexx'               => [   [ 'call_regexp_common'  , 'C'      ], ],
 | 
						|
    'ReasonML'           => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'ReScript'           => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'RobotFramework'     => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'   ],
 | 
						|
                                [ 'remove_matches'      , '^\s*Comment' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*\*{3}\s+(Variables|Test\s+Cases|Settings|Keywords)\s+\*{3}' ] ,
 | 
						|
                                [ 'remove_matches'      , '^\s*\[(Documentation|Tags)\]' ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Ruby'               => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_below_above'  , '^=begin', '^=end' ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Ruby HTML'          => [   [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ], ],
 | 
						|
    'Circom'             => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Cairo'              => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Rust'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'SaltStack'          => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'SAS'                => [
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_between_general', '*', ';' ],
 | 
						|
                            ],
 | 
						|
    'Sass'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Scala'              => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Scheme/SaltStack' => [ [ 'die' ,          ], ], # never called
 | 
						|
    'Scheme'             => [
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                                [ 'remove_inline'       , ';.*$'   ],
 | 
						|
                            ],
 | 
						|
    'SCSS'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'sed'                => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Slice'             => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Slim'               => [
 | 
						|
                                [ 'remove_slim_block'   ,          ],
 | 
						|
                            ],
 | 
						|
    'Slint'              => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'SKILL'              => [
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                            ],
 | 
						|
    'SKILL++'            => [
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                            ],
 | 
						|
    'Squirrel'           => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Starlark'           => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'docstring_to_C'                 ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'BizTalk Pipeline' =>   [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'BizTalk Orchestration' => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'Solidity'           => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'SparForte'          => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#!' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*--' ],
 | 
						|
                            ],
 | 
						|
    'Specman e'          => [
 | 
						|
                                [ 'pre_post_fix'        , "'>", "<'"],
 | 
						|
                                [ 'remove_between_general', "^'>", "^<'" ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++',   ],
 | 
						|
                                [ 'remove_matches'      , '^\s*--' ],
 | 
						|
                                [ 'rm_last_line'        , ],  # undo pre_post_fix addition
 | 
						|
                                                              # of trailing line of just <'
 | 
						|
                            ],
 | 
						|
    'SQL'                => [
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_matches'      , '^\s*--' ],
 | 
						|
                                [ 'remove_inline'       , '--.*$'  ],
 | 
						|
                            ],
 | 
						|
    'SQL Stored Procedure'=> [
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_matches'      , '^\s*--' ],
 | 
						|
                                [ 'remove_inline'       , '--.*$'  ],
 | 
						|
                            ],
 | 
						|
    'SQL Data'           => [
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_matches'      , '^\s*--' ],
 | 
						|
                                [ 'remove_inline'       , '--.*$'  ],
 | 
						|
                            ],
 | 
						|
    'Smalltalk'          => [
 | 
						|
                                [ 'call_regexp_common'  , 'Smalltalk'      ],
 | 
						|
                            ],
 | 
						|
    'Smarty'             => [
 | 
						|
                                [ 'smarty_to_C'                    ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                            ],
 | 
						|
    'Snakemake'          => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'docstring_to_C'                 ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Standard ML'        => [
 | 
						|
                                [ 'remove_between_general', '(*', '*)' ],
 | 
						|
                            ],
 | 
						|
    'Stata'              => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Stylus'             => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'SugarSS'            => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Svelte'             => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'SVG'                => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'Swift'              => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'SWIG'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
 | 
						|
    'm4'                 => [   [ 'remove_matches'      , '^dnl\s'  ], ],
 | 
						|
    'C Shell'            => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Kermit'             => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_matches'      , '^\s*;'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Korn Shell'         => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'TableGen'           => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Tcl/Tk'             => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'TEAL'               => [
 | 
						|
                                [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                            ],
 | 
						|
    'Teamcenter met'     => [   [ 'call_regexp_common'  , 'C'      ], ],
 | 
						|
    'Teamcenter mth'     => [   [ 'remove_matches'      , '^\s*#'  ], ],
 | 
						|
    'TeX'                => [
 | 
						|
                                [ 'remove_matches'      , '^\s*%'  ],
 | 
						|
                                [ 'remove_inline'       , '%.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Text'               => [
 | 
						|
                                [ 'remove_matches'      , '^\s*$'  ],
 | 
						|
                            ],
 | 
						|
    'TLA+'               => [
 | 
						|
                                [ 'remove_TLAPlus_generated_code'                 ],
 | 
						|
                                [ 'remove_matches'               , '^\\s*\\\\\\*' ],
 | 
						|
                                [ 'remove_TLAPlus_comments'                       ],
 | 
						|
                            ],
 | 
						|
    'Thrift'             => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Titanium Style Sheet'  => [
 | 
						|
                                [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                                [ 'remove_inline'       , '//.*$'  ],
 | 
						|
                                [ 'remove_between_regex', '/[^/]', '[^/]/' ],
 | 
						|
                            ],
 | 
						|
    'TNSDL'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'      ],
 | 
						|
                            ],
 | 
						|
    'TOML'               => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'TTCN'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'      ],
 | 
						|
                            ],
 | 
						|
    'TITAN Project File Information'               => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'tspeg'              => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Twig'               => [
 | 
						|
                                [ 'remove_between_general', '{#', '#}' ],
 | 
						|
                            ],
 | 
						|
    'TypeScript'         => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Typst'              => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Umka'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Unity-Prefab'       => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Visual Fox Pro'     =>  [
 | 
						|
                                [ 'remove_matches'      , '^\s*\*' ],
 | 
						|
                                [ 'remove_inline'       , '\*.*$'  ],
 | 
						|
                                [ 'remove_matches'      , '^\s*&&' ],
 | 
						|
                                [ 'remove_inline'       , '&&.*$'  ],
 | 
						|
                            ],
 | 
						|
    'Softbridge Basic'   => [   [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*Attribute\s+'],
 | 
						|
                                [ 'remove_matches'      , '^\s*\47'], ],  # \47 = '
 | 
						|
    # http://www.altium.com/files/learningguides/TR0114%20VHDL%20Language%20Reference.pdf
 | 
						|
    'Vala'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Vala Header'        => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Verilog-SystemVerilog/Coq' => [ ['die'] ], # never called
 | 
						|
    'Verilog-SystemVerilog' => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'VHDL'               => [
 | 
						|
                                [ 'remove_matches'      , '^\s*--' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                                [ 'remove_inline'       , '--.*$'  ],
 | 
						|
                            ],
 | 
						|
    'vim script'         => [
 | 
						|
                                [ 'remove_matches'      , '^\s*"'  ],
 | 
						|
                                [ 'remove_inline'       , '".*$'   ],
 | 
						|
                            ],
 | 
						|
    'Visual Basic Script' => [
 | 
						|
                                [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*Attribute\s+'],
 | 
						|
                                [ 'remove_matches'      , '^\s*\47'],     # \47 = '
 | 
						|
                            ],
 | 
						|
    'Visual Basic .NET' => [
 | 
						|
                                [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*Attribute\s+'],
 | 
						|
                                [ 'remove_matches'      , '^\s*\47'],     # \47 = '
 | 
						|
                            ],
 | 
						|
    'VB for Applications' => [
 | 
						|
                                [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*Attribute\s+'],
 | 
						|
                                [ 'remove_matches'      , '^\s*\47'],     # \47 = '
 | 
						|
                            ],
 | 
						|
    'Visual Basic'       => [
 | 
						|
                                [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*Attribute\s+'],
 | 
						|
                                [ 'remove_matches'      , '^\s*\47'],     # \47 = '
 | 
						|
                            ],
 | 
						|
    'Visualforce Component' => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'Visualforce Page'   => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'Velocity Template Language' => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                                [ 'remove_jsp_comments' ,          ],
 | 
						|
                                [ 'remove_matches'      , '^\s*##' ],
 | 
						|
                                [ 'remove_between_general', '#**', '*#' ],
 | 
						|
                                [ 'add_newlines'        ,          ],
 | 
						|
                            ],
 | 
						|
    'Vuejs Component'     => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Vyper'              => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'docstring_to_C'                 ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'Teamcenter def'     => [   [ 'remove_matches'      , '^\s*#'  ], ],
 | 
						|
    'Windows Module Definition' => [
 | 
						|
                                [ 'remove_matches'      , '^\s*;' ],
 | 
						|
                                [ 'remove_inline'       , ';.*$'  ],
 | 
						|
                            ],
 | 
						|
    'yacc'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'YAML'               => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    'lex'                => [   [ 'call_regexp_common'  , 'C'      ], ],
 | 
						|
    'XAML'               => [   [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ], ],
 | 
						|
    'xBase Header'       => [
 | 
						|
#                               [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*\&\&' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*\*' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*NOTE' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*note' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*Note' ],
 | 
						|
                                [ 'remove_inline'       , '\&\&.*$' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'xBase'              => [
 | 
						|
#                               [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*\&\&' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*\*' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*NOTE' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*note' ],
 | 
						|
                                [ 'remove_matches'      , '^\s*Note' ],
 | 
						|
                                [ 'remove_inline'       , '\&\&.*$' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'MXML'               => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                                [ 'remove_matches'      , '^\s*//' ],
 | 
						|
                                [ 'add_newlines'        ,          ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                            ],
 | 
						|
    'Web Services Description' => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'WebAssembly'           => [
 | 
						|
                                [ 'remove_matches'      , '^\s*;;' ],
 | 
						|
                            ],
 | 
						|
    'WGSL'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Windows Message File'  => [
 | 
						|
                                [ 'remove_matches'      , '^\s*;\s*//' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'          ],
 | 
						|
                                [ 'remove_matches'      , '^\s*;\s*$'  ],
 | 
						|
#                               next line only hypothetical
 | 
						|
#                               [ 'remove_matches_2re'  , '^\s*;\s*/\*',
 | 
						|
#                                                         '^\s*;\s*\*/', ],
 | 
						|
                            ],
 | 
						|
    'Windows Resource File' => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'WiX source'         => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'WiX include'        => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'WiX string localization' => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'WXML'               => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'WXSS'               => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C'      ],
 | 
						|
                            ],
 | 
						|
    'X++'                => [
 | 
						|
                                [ 'remove_matches', '\s*#\s*//' ],
 | 
						|
                                [ 'remove_between_regex', '#\s*/\*', '\*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'XMI'                => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'XML'                => [
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'XQuery'             => [
 | 
						|
                                [ 'remove_between_general', '(:', ':)' ],
 | 
						|
                            ],
 | 
						|
    'XSD'                => [   [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ], ],
 | 
						|
    'XSLT'               => [   [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ], ],
 | 
						|
    'Xtend'              => [   # copy of Java, plus triple << inline
 | 
						|
                                # separate /* inside quoted strings with two
 | 
						|
                                # concatenated strings split between / and *
 | 
						|
                                [ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\1',
 | 
						|
                                  '(.*?)' , '"$1$2$1 + $1$3$1$4"', 0],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                                [ 'remove_matches'      , '^\s*\x{c2ab}{3}'  ], # doesn't work
 | 
						|
                                # \xCA2B is unicode << character
 | 
						|
                            ],
 | 
						|
    'NAnt script'       => [    [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ], ],
 | 
						|
    'MSBuild script'    => [    [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ], ],
 | 
						|
    'Visual Studio Module' => [
 | 
						|
                                [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | 
						|
                                [ 'rm_comments_in_strings', '"', '//', '' ],
 | 
						|
                                [ 'call_regexp_common'  , 'C++'    ],
 | 
						|
                            ],
 | 
						|
    'Visual Studio Solution' => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_html_comments',          ],
 | 
						|
                                [ 'call_regexp_common'  , 'HTML'   ],
 | 
						|
                            ],
 | 
						|
    'Zig'                => [
 | 
						|
                                [ 'remove_matches'      , '^\s*//'  ],
 | 
						|
                                [ 'remove_inline'       , '//.*$'   ],
 | 
						|
                            ],
 | 
						|
    'zsh'                => [
 | 
						|
                                [ 'remove_matches'      , '^\s*#'  ],
 | 
						|
                                [ 'remove_inline'       , '#.*$'   ],
 | 
						|
                            ],
 | 
						|
    );
 | 
						|
# 1}}}
 | 
						|
%{$rh_EOL_continuation_re} = (               # {{{1
 | 
						|
    'ActionScript'       =>     '\\\\$'         ,
 | 
						|
    'AspectJ'            =>     '\\\\$'         ,
 | 
						|
    'Assembly'           =>     '\\\\$'         ,
 | 
						|
    'ASP'                =>     '\\\\$'         ,
 | 
						|
    'ASP.NET'            =>     '\\\\$'         ,
 | 
						|
    'Ada'                =>     '\\\\$'         ,
 | 
						|
    'awk'                =>     '\\\\$'         ,
 | 
						|
    'bc'                 =>     '\\\\$'         ,
 | 
						|
    'C'                  =>     '\\\\$'         ,
 | 
						|
    'C++'                =>     '\\\\$'         ,
 | 
						|
    'C/C++ Header'       =>     '\\\\$'         ,
 | 
						|
    'CMake'              =>     '\\\\$'         ,
 | 
						|
    'Cython'             =>     '\\\\$'         ,
 | 
						|
    'C#'                 =>     '\\\\$'         ,
 | 
						|
    'C# Designer'        =>     '\\\\$'         ,
 | 
						|
    'Cake Build Script'  =>     '\\\\$'         ,
 | 
						|
    'D'                  =>     '\\\\$'         ,
 | 
						|
    'Dart'               =>     '\\\\$'         ,
 | 
						|
    'Expect'             =>     '\\\\$'         ,
 | 
						|
    'Futhark'            =>     '\\\\$'         ,
 | 
						|
    'Gencat NLS'         =>     '\\\\$'         ,
 | 
						|
    'Go'                 =>     '\\\\$'         ,
 | 
						|
    'IDL'                =>     '\$\\$'         ,
 | 
						|
    'Igor Pro'           =>     '\\$'           ,
 | 
						|
#   'Java'               =>     '\\\\$'         ,
 | 
						|
    'JavaScript'         =>     '\\\\$'         ,
 | 
						|
    'JSON5'              =>     '\\\\$'         ,
 | 
						|
    'JSX'                =>     '\\\\$'         ,
 | 
						|
    'LESS'               =>     '\\\\$'         ,
 | 
						|
    'Lua'                =>     '\\\\$'         ,
 | 
						|
    'Luau'               =>     '\\\\$'         ,
 | 
						|
    'make'               =>     '\\\\$'         ,
 | 
						|
    'MATLAB'             =>     '\.\.\.\s*$'    ,
 | 
						|
    'Meson'              =>     '\\\\$'         ,
 | 
						|
    'Metal'              =>     '\\\\$'         ,
 | 
						|
    'MXML'               =>     '\\\\$'         ,
 | 
						|
    'Objective-C'        =>     '\\\\$'         ,
 | 
						|
    'Objective-C++'      =>     '\\\\$'         ,
 | 
						|
    'OCaml'              =>     '\\\\$'         ,
 | 
						|
    'Octave'             =>     '\.\.\.\s*$'    ,
 | 
						|
    'Qt Project'         =>     '\\\\$'         ,
 | 
						|
    'Patran Command Language'=> '\\\\$'         ,
 | 
						|
    'PowerBuilder'       =>     '\\\\$'         ,
 | 
						|
    'PowerShell'         =>     '\\\\$'         ,
 | 
						|
    'Python'             =>     '\\\\$'         ,
 | 
						|
    'R'                  =>     '\\\\$'         ,
 | 
						|
    'Rmd'                =>     '\\\\$'         ,
 | 
						|
    'Ruby'               =>     '\\\\$'         ,
 | 
						|
    'sed'                =>     '\\\\$'         ,
 | 
						|
    'Swift'              =>     '\\\\$'         ,
 | 
						|
    'Bourne Again Shell' =>     '\\\\$'         ,
 | 
						|
    'Bourne Shell'       =>     '\\\\$'         ,
 | 
						|
    'C Shell'            =>     '\\\\$'         ,
 | 
						|
    'kvlang'             =>     '\\\\$'         ,
 | 
						|
    'Kermit'             =>     '\\\\$'         ,
 | 
						|
    'Korn Shell'         =>     '\\\\$'         ,
 | 
						|
    'Slint'              =>     '\\\\$'         ,
 | 
						|
    'Snakemake'          =>     '\\\\$'         ,
 | 
						|
    'Starlark'           =>     '\\\\$'         ,
 | 
						|
    'Solidity'           =>     '\\\\$'         ,
 | 
						|
    'Stata'              =>     '///$'          ,
 | 
						|
    'Stylus'             =>     '\\\\$'         ,
 | 
						|
    'Tcl/Tk'             =>     '\\\\$'         ,
 | 
						|
    'TTCN'               =>     '\\\\$'         ,
 | 
						|
    'TypeScript'         =>     '\\\\$'         ,
 | 
						|
    'lex'                =>     '\\\\$'         ,
 | 
						|
    'Vala'               =>     '\\\\$'         ,
 | 
						|
    'Vala Header'        =>     '\\\\$'         ,
 | 
						|
    'Vyper'              =>     '\\\\$'         ,
 | 
						|
    'X++'                =>     '\\\\$'         ,
 | 
						|
    'zsh'                =>     '\\\\$'         ,
 | 
						|
    );
 | 
						|
# 1}}}
 | 
						|
%{$rh_Not_Code_Extension}    = (             # {{{1
 | 
						|
   '1'             => 1,  # Man pages (documentation):
 | 
						|
   '2'             => 1,
 | 
						|
   '3'             => 1,
 | 
						|
   '4'             => 1,
 | 
						|
   '5'             => 1,
 | 
						|
   '6'             => 1,
 | 
						|
   '7'             => 1,
 | 
						|
   '8'             => 1,
 | 
						|
   '9'             => 1,
 | 
						|
   'a'             => 1,  # Static object code.
 | 
						|
   'ad'            => 1,  # X application default resource file.
 | 
						|
   'afm'           => 1,  # font metrics
 | 
						|
   'arc'           => 1,  # arc(1) archive
 | 
						|
   'arj'           => 1,  # arj(1) archive
 | 
						|
   'au'            => 1,  # Audio sound filearj(1) archive
 | 
						|
   'bak'           => 1,  # Backup files - we only want to count the "real" files.
 | 
						|
   'bdf'           => 1,
 | 
						|
   'bmp'           => 1,
 | 
						|
   'bz2'           => 1,  # bzip2(1) compressed file
 | 
						|
   'csv'           => 1,
 | 
						|
   'desktop'       => 1,
 | 
						|
   'dic'           => 1,
 | 
						|
   'doc'           => 1,
 | 
						|
   'elc'           => 1,
 | 
						|
   'eps'           => 1,
 | 
						|
   'fig'           => 1,
 | 
						|
   'gif'           => 1,
 | 
						|
   'gz'            => 1,
 | 
						|
   'h5'            => 1,  # hierarchical data format
 | 
						|
   'hdf'           => 1,  # hierarchical data format
 | 
						|
   'in'            => 1,  # Debatable.
 | 
						|
   'jpg'           => 1,
 | 
						|
   'kdelnk'        => 1,
 | 
						|
   'man'           => 1,
 | 
						|
   'mf'            => 1,
 | 
						|
   'mp3'           => 1,
 | 
						|
   'n'             => 1,
 | 
						|
   'o'             => 1,  # Object code is generated from source code.
 | 
						|
   'o.cmd'         => 1,  # not DOS Batch; Linux kernel compilation optimization file
 | 
						|
   'o.d'           => 1,  # cmake object dependency file
 | 
						|
   'pbm'           => 1,
 | 
						|
   'pdf'           => 1,
 | 
						|
   'pfb'           => 1,
 | 
						|
   'png'           => 1,
 | 
						|
   'ppt'           => 1,
 | 
						|
   'pptx'          => 1,
 | 
						|
   'ps'            => 1,  # Postscript is _USUALLY_ generated automatically.
 | 
						|
   'sgm'           => 1,
 | 
						|
   'sgml'          => 1,
 | 
						|
   'so'            => 1,  # Dynamically-loaded object code.
 | 
						|
   'Tag'           => 1,
 | 
						|
   'tfm'           => 1,
 | 
						|
   'tgz'           => 1,  # gzipped tarball
 | 
						|
   'tiff'          => 1,
 | 
						|
   'tsv'           => 1,  # tab separated values
 | 
						|
   'vf'            => 1,
 | 
						|
   'wav'           => 1,
 | 
						|
   'xbm'           => 1,
 | 
						|
   'xls'           => 1,
 | 
						|
   'xlsx'          => 1,
 | 
						|
   'xpm'           => 1,
 | 
						|
   'Y'             => 1,  # file compressed with "Yabba"
 | 
						|
   'Z'             => 1,  # file compressed with "compress"
 | 
						|
   'zip'           => 1,  # zip archive
 | 
						|
   'gitattributes' => 1,
 | 
						|
   'gitignore'     => 1,
 | 
						|
   'gitmodules'    => 1,
 | 
						|
); # 1}}}
 | 
						|
%{$rh_Not_Code_Filename}     = (             # {{{1
 | 
						|
   'AUTHORS'     => 1,
 | 
						|
   'BUGS'        => 1,
 | 
						|
   'BUGS'        => 1,
 | 
						|
   'Changelog'   => 1,
 | 
						|
   'ChangeLog'   => 1,
 | 
						|
   'ChangeLog'   => 1,
 | 
						|
   'Changes'     => 1,
 | 
						|
   'CHANGES'     => 1,
 | 
						|
   'COPYING'     => 1,
 | 
						|
   'COPYING'     => 1,
 | 
						|
   'DESCRIPTION' => 1, # R packages metafile
 | 
						|
   '.cvsignore'  => 1,
 | 
						|
   'Entries'     => 1,
 | 
						|
   'FAQ'         => 1,
 | 
						|
   'INSTALL'     => 1,
 | 
						|
   'MAINTAINERS' => 1,
 | 
						|
   'MD5SUMS'     => 1,
 | 
						|
   'NAMESPACE'   => 1, # R packages metafile
 | 
						|
   'NEWS'        => 1,
 | 
						|
   'readme'      => 1,
 | 
						|
   'Readme'      => 1,
 | 
						|
   'README'      => 1,
 | 
						|
   'README.tk'   => 1, # used in kdemultimedia, it's confusing.
 | 
						|
   'Repository'  => 1,
 | 
						|
   'Root'        => 1, # CVS
 | 
						|
   'TODO'        => 1,
 | 
						|
);
 | 
						|
# 1}}}
 | 
						|
%{$rh_Scale_Factor}          = (             # {{{1
 | 
						|
    '(unknown)'                    =>   0.00,
 | 
						|
    '1st generation default'       =>   0.25,
 | 
						|
    '2nd generation default'       =>   0.75,
 | 
						|
    '3rd generation default'       =>   1.00,
 | 
						|
    '4th generation default'       =>   4.00,
 | 
						|
    '5th generation default'       =>  16.00,
 | 
						|
    'ABAP'                         =>   5.00,
 | 
						|
    'ActionScript'                 =>   1.36,
 | 
						|
    'Ada'                          =>   0.52,
 | 
						|
    'ADSO/IDSM'                    =>   3.00,
 | 
						|
    'Agda'                         =>   2.11,
 | 
						|
    'AMPLE'                        =>   2.00,
 | 
						|
    'AnsProlog'                    =>   1.25,
 | 
						|
    'Ant/XML'                      =>   1.90,
 | 
						|
    'Ant'                          =>   1.90,
 | 
						|
    'ANTLR Grammar'                =>   2.00,
 | 
						|
    'SQL'                          =>   6.15,
 | 
						|
    'SQL Stored Procedure'         =>   6.15,
 | 
						|
    'SQL Data'                     =>   1.00,
 | 
						|
    'Apex Class'                   =>   1.50,
 | 
						|
    'APL'                          =>   2.50,
 | 
						|
    'ArkTs'                        =>   2.50,
 | 
						|
    'Arturo'                       =>   4.00,
 | 
						|
    'AsciiDoc'                     =>   1.50,
 | 
						|
    'AspectJ'                      =>   1.36,
 | 
						|
    'asa'                          =>   1.29,
 | 
						|
    'ASP'                          =>   1.29,
 | 
						|
    'ASP.NET'                      =>   1.29,
 | 
						|
    'CCS'                          =>   5.33,
 | 
						|
    'Apex Trigger'                 =>   1.4 ,
 | 
						|
    'AppleScript'                  =>   4.0 ,
 | 
						|
    'Arduino Sketch'               =>   1.00,
 | 
						|
    'Assembly'                     =>   0.25,
 | 
						|
    'Astro'                        =>   3.0,
 | 
						|
    'Asymptote'                    =>   2.50,
 | 
						|
    'autocoder'                    =>   0.25,
 | 
						|
    'AutoHotkey'                   =>   1.29,
 | 
						|
    'awk'                          =>   3.81,
 | 
						|
    'basic'                        =>   0.75,
 | 
						|
    'Bazel'                        =>   1.00,
 | 
						|
    'bc'                           =>   1.50,
 | 
						|
    'Blade'                        =>   2.00,
 | 
						|
    'bliss'                        =>   0.75,
 | 
						|
    'bmsgen'                       =>   2.22,
 | 
						|
    'bteq'                         =>   6.15,
 | 
						|
    'Brainfuck'                    =>   0.10,
 | 
						|
    'BrightScript'                 =>   2.00,
 | 
						|
    'builder'                      =>   2.00,
 | 
						|
    'C'                            =>   0.77,
 | 
						|
    'c set 2'                      =>   0.88,
 | 
						|
    'C#'                           =>   1.36,
 | 
						|
    'C# Designer'                  =>   1.36,
 | 
						|
    'C# Generated'                 =>   1.36,
 | 
						|
    'Cake Build Script'            =>   1.36,
 | 
						|
    'C++'                          =>   1.51,
 | 
						|
    'Carbon'                       =>   1.51,
 | 
						|
    'ColdFusion'                   =>   4.00,
 | 
						|
    'ColdFusion CFScript'          =>   4.00,
 | 
						|
    'Chapel'                       =>   2.96,
 | 
						|
    'Clean'                        =>   2.50,
 | 
						|
    'Clojure'                      =>   1.25,
 | 
						|
    'ClojureScript'                =>   1.25,
 | 
						|
    'ClojureC'                     =>   1.25,
 | 
						|
    'CMake'                        =>   1.00,
 | 
						|
    'COBOL'                        =>   1.04,
 | 
						|
    'CoCoA 5'                      =>   1.04,
 | 
						|
    'CoffeeScript'                 =>   2.00,
 | 
						|
    'Constraint Grammar'           =>   4.00,
 | 
						|
    'Containerfile'                =>   2.00,
 | 
						|
    'Coq'                          =>   5.00,
 | 
						|
    'Crystal'                      =>   2.50,
 | 
						|
    'Crystal Reports'              =>   4.00,
 | 
						|
    'csl'                          =>   1.63,
 | 
						|
    'CSON'                         =>   2.50,
 | 
						|
    'csp'                          =>   1.51,
 | 
						|
    'cssl'                         =>   1.74,
 | 
						|
    'CSS'                          =>   1.0,
 | 
						|
    'CSV'                          =>   0.1,
 | 
						|
    'Cucumber'                     =>   3.00,
 | 
						|
    'CUDA'                         =>   1.00,
 | 
						|
    'D'                            =>   1.70,
 | 
						|
    'Dafny'                        =>   3.00,
 | 
						|
    'DAL'                          =>   1.50,
 | 
						|
    'Dart'                         =>   2.00,
 | 
						|
    'DenizenScript'                =>   1.00,
 | 
						|
    'Delphi Form'                  =>   2.00,
 | 
						|
    'DIET'                         =>   2.00,
 | 
						|
    'diff'                         =>   1.00,
 | 
						|
    'Derw'                         =>   3.00,
 | 
						|
    'dhall'                        =>   2.11,
 | 
						|
    'DITA'                         =>   1.90,
 | 
						|
    'dtrace'                       =>   2.00,
 | 
						|
    'NASTRAN DMAP'                 =>   2.35,
 | 
						|
    'DOORS Extension Language'     =>   1.50,
 | 
						|
    'Dockerfile'                   =>   2.00,
 | 
						|
    'DOS Batch'                    =>   0.63,
 | 
						|
    'Drools'                       =>   2.00,
 | 
						|
    'ECPP'                         =>   1.90,
 | 
						|
    'eda/sql'                      =>   6.67,
 | 
						|
    'edscheme 3.4'                 =>   1.51,
 | 
						|
    'EEx'                          =>   2.00,
 | 
						|
    'EJS'                          =>   2.50,
 | 
						|
    'Elixir'                       =>   2.11,
 | 
						|
    'Elm'                          =>   2.50,
 | 
						|
    'Embedded Crystal'             =>   2.00,
 | 
						|
    'ERB'                          =>   2.00,
 | 
						|
    'Erlang'                       =>   2.11,
 | 
						|
    'Fennel'                       =>   2.50,
 | 
						|
    'Finite State Language'        =>   2.00,
 | 
						|
    'Focus'                        =>   1.90,
 | 
						|
    'Forth'                        =>   1.25,
 | 
						|
    'Fortran 66'                   =>   0.63,
 | 
						|
    'Fortran 77'                   =>   0.75,
 | 
						|
    'Fortran 90'                   =>   1.00,
 | 
						|
    'Fortran 95'                   =>   1.13,
 | 
						|
    'Fortran II'                   =>   0.63,
 | 
						|
    'foundation'                   =>   2.76,
 | 
						|
    'Freemarker Template'          =>   1.48,
 | 
						|
    'Futhark'                      =>   3.00, # Guessed from value of ML
 | 
						|
    'F#'                           =>   2.50,
 | 
						|
    'F# Script'                    =>   2.50,
 | 
						|
    'Flatbuffers'                  =>   2.50,
 | 
						|
    'Glade'                        =>   2.00,
 | 
						|
    'Gleam'                        =>   2.50,
 | 
						|
    'GLSL'                         =>   2.00,
 | 
						|
    'Glimmer JavaScript'           =>   3.50,
 | 
						|
    'Glimmer TypeScript'           =>   3.50,
 | 
						|
    'gml'                          =>   1.74,
 | 
						|
    'gpss'                         =>   1.74,
 | 
						|
    'guest'                        =>   2.86,
 | 
						|
    'guru'                         =>   1.63,
 | 
						|
    'GDScript'                     =>   2.50,
 | 
						|
    'Godot Scene'                  =>   2.50,
 | 
						|
    'Godot Resource'               =>   2.50,
 | 
						|
    'Godot Shaders'                =>   2.50,
 | 
						|
    'Go'                           =>   2.50,
 | 
						|
    'Gradle'                       =>   4.00,
 | 
						|
    'Grails'                       =>   1.48,
 | 
						|
    'GraphQL'                      =>   4.00,
 | 
						|
    'Groovy'                       =>   4.10,
 | 
						|
    'gw basic'                     =>   0.82,
 | 
						|
    'HCL'                          =>   2.50,
 | 
						|
    'high c'                       =>   0.63,
 | 
						|
    'hlevel'                       =>   1.38,
 | 
						|
    'hp basic'                     =>   0.63,
 | 
						|
    'Haml'                         =>   2.50,
 | 
						|
    'Handlebars'                   =>   2.50,
 | 
						|
    'Harbour'                      =>   2.00,
 | 
						|
    'Hare'                         =>   2.50,
 | 
						|
    'Haskell'                      =>   2.11,
 | 
						|
    'Haxe'                         =>   2.00,
 | 
						|
    'HolyC'                        =>   2.50,
 | 
						|
    'Hoon'                         =>   2.00,
 | 
						|
    'HTML'                         =>   1.90,
 | 
						|
    'HTML EEx'                     =>   3.00,
 | 
						|
    'XHTML'                        =>   1.90,
 | 
						|
    'XMI'                          =>   1.90,
 | 
						|
    'XML'                          =>   1.90,
 | 
						|
    'FXML'                         =>   1.90,
 | 
						|
    'MXML'                         =>   1.90,
 | 
						|
    'XSLT'                         =>   1.90,
 | 
						|
    'DTD'                          =>   1.90,
 | 
						|
    'XSD'                          =>   1.90,
 | 
						|
    'NAnt script'                  =>   1.90,
 | 
						|
    'MSBuild script'               =>   1.90,
 | 
						|
    'Visual Studio Module'         =>   1.00,
 | 
						|
    'Visual Studio Solution'       =>   1.00,
 | 
						|
    'HLSL'                         =>   2.00,
 | 
						|
    'Idris'                        =>   2.00,
 | 
						|
    'Literate Idris'               =>   2.00,
 | 
						|
    'Igor Pro'                     =>   4.00,
 | 
						|
    'Imba'                         =>   3.00,
 | 
						|
    'INI'                          =>   1.00,
 | 
						|
    'InstallShield'                =>   1.90,
 | 
						|
    'IPL'                          =>   2.00,
 | 
						|
    'Jai'                          =>   1.13,
 | 
						|
    'Jam'                          =>   2.00,
 | 
						|
    'Janet'                        =>   3.00,
 | 
						|
    'Java'                         =>   1.36,
 | 
						|
    'JavaScript'                   =>   1.48,
 | 
						|
    'JavaServer Faces'             =>   1.5 ,
 | 
						|
    'Jinja Template'               =>   1.5 ,
 | 
						|
    'JSON'                         =>   2.50,
 | 
						|
    'JSON5'                        =>   2.50,
 | 
						|
    'JSP'                          =>   1.48,
 | 
						|
    'JSX'                          =>   1.48,
 | 
						|
    'Velocity Template Language'   =>   1.00,
 | 
						|
    'JCL'                          =>   1.67,
 | 
						|
    'Juniper Junos'                =>   2.00,
 | 
						|
    'kvlang'                       =>   2.00,
 | 
						|
    'Kermit'                       =>   2.00,
 | 
						|
    'Korn Shell'                   =>   3.81,
 | 
						|
    'Kotlin'                       =>   2.00,
 | 
						|
    'Lean'                         =>   3.00,
 | 
						|
    'LESS'                         =>   1.50,
 | 
						|
    'Lem'                          =>   3.00,
 | 
						|
    'LFE'                          =>   1.25,
 | 
						|
    'Linker Script'                =>   1.00,
 | 
						|
    'liquid'                       =>   3.00,
 | 
						|
    'Lisp'                         =>   1.25,
 | 
						|
    'LiveLink OScript'             =>   3.5 ,
 | 
						|
    'LLVM IR'                      =>   0.90,
 | 
						|
    'Logos'                        =>   2.00,
 | 
						|
    'Logtalk'                      =>   2.00,
 | 
						|
    'm4'                           =>   1.00,
 | 
						|
    'make'                         =>   2.50,
 | 
						|
    'Mako'                         =>   1.50,
 | 
						|
    'Markdown'                     =>   1.00,
 | 
						|
    'mathcad'                      =>  16.00,
 | 
						|
    'Maven'                        =>   1.90,
 | 
						|
    'Meson'                        =>   1.00,
 | 
						|
    'Metal'                        =>   1.51,
 | 
						|
    'Modelica'                     =>   2.00,
 | 
						|
    'MUMPS'                        =>   4.21,
 | 
						|
    'Mustache'                     =>   1.75,
 | 
						|
    'Nastran'                      =>   1.13,
 | 
						|
    'Nemerle'                      =>   2.50,
 | 
						|
    'NetLogo'                      =>   4.00,
 | 
						|
    'Nickel'                       =>   2.00,
 | 
						|
    'Nim'                          =>   2.00,
 | 
						|
    'Nix'                          =>   2.70,
 | 
						|
    'Nunjucks'                     =>   1.5 ,
 | 
						|
    'Objective-C'                  =>   2.96,
 | 
						|
    'Objective-C++'                =>   2.96,
 | 
						|
    'OCaml'                        =>   3.00,
 | 
						|
    'Odin'                         =>   2.00,
 | 
						|
    'OpenSCAD'                     =>   1.00,
 | 
						|
    'Oracle Reports'               =>   2.76,
 | 
						|
    'Oracle Forms'                 =>   2.67,
 | 
						|
    'Oracle Developer/2000'        =>   3.48,
 | 
						|
    'Other'                        =>   1.00,
 | 
						|
    'P4'                           =>   1.5 ,
 | 
						|
    'Pascal'                       =>   0.88,
 | 
						|
    'Patran Command Language'      =>   2.50,
 | 
						|
    'Perl'                         =>   4.00,
 | 
						|
    'PEG'                          =>   3.00,
 | 
						|
    'peg.js'                       =>   3.00,
 | 
						|
    'peggy'                        =>   3.00,
 | 
						|
    'Pest'                         =>   2.00,
 | 
						|
    'tspeg'                        =>   3.00,
 | 
						|
    'Pig Latin'                    =>   1.00,
 | 
						|
    'PL/I'                         =>   1.38,
 | 
						|
    'PL/M'                         =>   1.13,
 | 
						|
    'PlantUML'                     =>   2.00,
 | 
						|
    'Oracle PL/SQL'                =>   2.58,
 | 
						|
    'PO File'                      =>   1.50,
 | 
						|
    'Pony'                         =>   3.00,
 | 
						|
    'PowerBuilder'                 =>   3.33,
 | 
						|
    'PowerShell'                   =>   3.00,
 | 
						|
    'Prisma Schema'                =>   2.50,
 | 
						|
    'Processing'                   =>   2.50,
 | 
						|
    'ProGuard'                     =>   2.50,
 | 
						|
    'Prolog'                       =>   1.25,
 | 
						|
    'Properties'                   =>   1.36,
 | 
						|
    'Protocol Buffers'             =>   2.00,
 | 
						|
    'PRQL'                         =>   3.00,
 | 
						|
    'Pug'                          =>   2.00,
 | 
						|
    'Puppet'                       =>   2.00,
 | 
						|
    'PureScript'                   =>   2.00,
 | 
						|
    'QML'                          =>   1.25,
 | 
						|
    'Qt'                           =>   2.00,
 | 
						|
    'Qt Linguist'                  =>   1.00,
 | 
						|
    'Qt Project'                   =>   1.00,
 | 
						|
    'R'                            =>   3.00,
 | 
						|
    'Rmd'                          =>   3.00,
 | 
						|
    'Racket'                       =>   1.50,
 | 
						|
    'Raku'                         =>   4.00,
 | 
						|
    'rally'                        =>   2.00,
 | 
						|
    'ramis ii'                     =>   2.00,
 | 
						|
    'RAML'                         =>   0.90,
 | 
						|
    'ReasonML'                     =>   2.50,
 | 
						|
    'ReScript'                     =>   2.50,
 | 
						|
    'reStructuredText'             =>   1.50,
 | 
						|
    'Razor'                        =>   2.00,
 | 
						|
    'Rexx'                         =>   1.19,
 | 
						|
    'Ring'                         =>   4.20,
 | 
						|
    'RobotFramework'               =>   2.50,
 | 
						|
    'Circom'                       =>   1.00,
 | 
						|
    'Cairo'                        =>   1.00,
 | 
						|
    'Rust'                         =>   1.00,
 | 
						|
    'sas'                          =>   1.95,
 | 
						|
    'Scala'                        =>   4.10,
 | 
						|
    'Scheme'                       =>   1.51,
 | 
						|
    'Slim'                         =>   3.00,
 | 
						|
    'Solidity'                     =>   1.48,
 | 
						|
    'Bourne Shell'                 =>   3.81,
 | 
						|
    'Bourne Again Shell'           =>   3.81,
 | 
						|
    'ksh'                          =>   3.81,
 | 
						|
    'zsh'                          =>   3.81,
 | 
						|
    'Fish Shell'                   =>   3.81,
 | 
						|
    'C Shell'                      =>   3.81,
 | 
						|
    'SaltStack'                    =>   2.00,
 | 
						|
    'SAS'                          =>   1.5 ,
 | 
						|
    'Sass'                         =>   1.5 ,
 | 
						|
    'SCSS'                         =>   1.5 ,
 | 
						|
    'SKILL'                        =>   2.00,
 | 
						|
    'SKILL++'                      =>   2.00,
 | 
						|
    'Slice'                        =>   1.50,
 | 
						|
    'Slint'                        =>   1.00,
 | 
						|
    'Smalltalk'                    =>   4.00,
 | 
						|
    'Smarty'                       =>   3.50,
 | 
						|
    'Softbridge Basic'             =>   2.76,
 | 
						|
    'SparForte'                    =>   3.80,
 | 
						|
    'sps'                          =>   0.25,
 | 
						|
    'spss'                         =>   2.50,
 | 
						|
    'Specman e'                    =>   2.00,
 | 
						|
    'SQL'                          =>   2.29,
 | 
						|
    'Squirrel'                     =>   2.50,
 | 
						|
    'Standard ML'                  =>   3.00,
 | 
						|
    'Stata'                        =>   3.00,
 | 
						|
    'Stylus'                       =>   1.48,
 | 
						|
    'SugarSS'                      =>   2.50,
 | 
						|
    'Svelte'                       =>   2.00,
 | 
						|
    'SVG'                          =>   1.00,
 | 
						|
    'Swift'                        =>   2.50,
 | 
						|
    'SWIG'                         =>   2.50,
 | 
						|
    'TableGen'                     =>   2.00,
 | 
						|
    'Tcl/Tk'                       =>   4.00,
 | 
						|
    'TEAL'                         =>   0.50,
 | 
						|
    'Teamcenter def'               =>   1.00,
 | 
						|
    'Teamcenter met'               =>   1.00,
 | 
						|
    'Teamcenter mth'               =>   1.00,
 | 
						|
    'TeX'                          =>   1.50,
 | 
						|
    'Text'                         =>   0.50,
 | 
						|
    'Thrift'                       =>   2.50,
 | 
						|
    'TLA+'                         =>   1.00,
 | 
						|
    'Titanium Style Sheet'         =>   2.00,
 | 
						|
    'TOML'                         =>   2.76,
 | 
						|
    'Twig'                         =>   2.00,
 | 
						|
    'TNSDL'                        =>   2.00,
 | 
						|
    'TTCN'                         =>   2.00,
 | 
						|
    'TITAN Project File Information' =>   1.90,
 | 
						|
    'TypeScript'                   =>   2.00,
 | 
						|
    'Typst'                        =>   3.00,
 | 
						|
    'Umka'                         =>   2.00,
 | 
						|
    'Unity-Prefab'                 =>   2.50,
 | 
						|
    'Vala'                         =>   1.50,
 | 
						|
    'Vala Header'                  =>   1.40,
 | 
						|
    'Verilog-SystemVerilog'        =>   1.51,
 | 
						|
    'VHDL'                         =>   4.21,
 | 
						|
    'vim script'                   =>   3.00,
 | 
						|
    'Visual Basic'                 =>   2.76,
 | 
						|
    'VB for Applications'          =>   2.76,
 | 
						|
    'Visual Basic .NET'            =>   2.76,
 | 
						|
    'Visual Basic Script'          =>   2.76,
 | 
						|
    'Visual Fox Pro'               =>   4.00, # Visual Fox Pro is not available in the language gearing ratios listed at Mayes Consulting web site
 | 
						|
    'Visualforce Component'        =>   1.9 ,
 | 
						|
    'Visualforce Page'             =>   1.9 ,
 | 
						|
    'Vuejs Component'              =>   2.00,
 | 
						|
    'Vyper'                        =>   4.20,
 | 
						|
    'Web Services Description'     =>   1.00,
 | 
						|
    'WebAssembly'                  =>   0.45,
 | 
						|
    'WGSL'                         =>   2.50,
 | 
						|
    'Windows Message File'         =>   1.00,
 | 
						|
    'Windows Resource File'        =>   1.00,
 | 
						|
    'Windows Module Definition'    =>   1.00,
 | 
						|
    'WiX source'                   =>   1.90,
 | 
						|
    'WiX include'                  =>   1.90,
 | 
						|
    'WiX string localization'      =>   1.90,
 | 
						|
    'WXML'                         =>   1.90,
 | 
						|
    'WXSS'                         =>   1.00,
 | 
						|
    'xBase'                        =>   2.00,
 | 
						|
    'xBase Header'                 =>   2.00,
 | 
						|
    'xlisp'                        =>   1.25,
 | 
						|
    'X++'                          =>   1.51, # This is a guess. Copied from C++, because the overhead for C++ headers might be equivalent to the overhead of structuring elements in XPO files
 | 
						|
    'XAML'                         =>   1.90,
 | 
						|
    'XQuery'                       =>   2.50,
 | 
						|
    'yacc'                         =>   1.51,
 | 
						|
    'yacc++'                       =>   1.51,
 | 
						|
    'YAML'                         =>   0.90,
 | 
						|
    'Expect'                       => 2.00,
 | 
						|
    'Gencat NLS'                   => 1.50,
 | 
						|
    'C/C++ Header'                 => 1.00,
 | 
						|
    'inc'                          => 1.00,
 | 
						|
    'lex'                          => 1.00,
 | 
						|
    'Julia'                        => 4.00,
 | 
						|
    'MATLAB'                       => 4.00,
 | 
						|
    'Mathematica'                  => 5.00,
 | 
						|
    'Mercury'                      => 3.00,
 | 
						|
    'Maven/XML'                    => 2.5,
 | 
						|
    'IDL'                          => 3.80,
 | 
						|
    'Octave'                       => 4.00,
 | 
						|
    'ML'                           => 3.00,
 | 
						|
    'Modula3'                      => 2.00,
 | 
						|
    'Mojom'                        => 2.00,
 | 
						|
    'Mojo'                         => 4.20,
 | 
						|
    'PHP'                          => 3.50,
 | 
						|
    'Jupyter Notebook'             => 4.20,
 | 
						|
    'Python'                       => 4.20,
 | 
						|
    'Snakemake'                    => 4.20,
 | 
						|
    'RapydScript'                  => 4.20,
 | 
						|
    'Starlark'                     => 4.20,
 | 
						|
    'BizTalk Pipeline'             => 1.00,
 | 
						|
    'BizTalk Orchestration'        => 1.00,
 | 
						|
    'Cython'                       => 3.80,
 | 
						|
    'Ruby'                         => 4.20,
 | 
						|
    'Ruby HTML'                    => 4.00,
 | 
						|
    'sed'                          => 4.00,
 | 
						|
    'Lua'                          => 4.00,
 | 
						|
    'Luau'                         => 4.50,
 | 
						|
    'OpenCL'                       => 1.50,
 | 
						|
    'Xtend'                        => 2.00,
 | 
						|
    'Zig'                          => 2.50,
 | 
						|
    # aggregates; value is meaningless
 | 
						|
    'C#/Smalltalk'                    => 1.00,
 | 
						|
    'D/dtrace'                        => 1.00,
 | 
						|
    'F#/Forth'                        => 1.00,
 | 
						|
    'Fortran 77/Forth'                => 1.00,
 | 
						|
    'Lisp/Julia'                      => 1.00,
 | 
						|
    'Lisp/OpenCL'                     => 1.00,
 | 
						|
    'PHP/Pascal/Fortran'              => 1.00,
 | 
						|
    'Pascal/Puppet'                   => 1.00,
 | 
						|
    'Perl/Prolog'                     => 1.00,
 | 
						|
    'Raku/Prolog'                     => 1.00,
 | 
						|
    'Verilog-SystemVerilog/Coq'    => 1.00,
 | 
						|
    'MATLAB/Mathematica/Objective-C/MUMPS/Mercury' => 1.00,
 | 
						|
    'IDL/Qt Project/Prolog/ProGuard'     => 1.00,
 | 
						|
);
 | 
						|
# 1}}}
 | 
						|
%{$rh_Known_Binary_Archives} = (             # {{{1
 | 
						|
            '.tar'     => 1 ,
 | 
						|
            '.tar.Z'   => 1 ,
 | 
						|
            '.tar.gz'  => 1 ,
 | 
						|
            '.tar.bz2' => 1 ,
 | 
						|
            '.zip'     => 1 ,
 | 
						|
            '.Zip'     => 1 ,
 | 
						|
            '.ZIP'     => 1 ,
 | 
						|
            '.ear'     => 1 ,  # Java
 | 
						|
            '.war'     => 1 ,  # contained within .ear
 | 
						|
            '.xz'      => 1 ,
 | 
						|
            '.whl'     => 1 ,  # Python wheel files (zip)
 | 
						|
            );
 | 
						|
# 1}}}
 | 
						|
} # end sub set_constants()
 | 
						|
sub check_scale_existence {                  # {{{1
 | 
						|
    # do a few sanity checks
 | 
						|
    my ($rhaa_Filters_by_Language,
 | 
						|
        $rh_Language_by_Extension,
 | 
						|
        $rh_Scale_Factor) = @_;
 | 
						|
 | 
						|
    my $OK = 1;
 | 
						|
    foreach my $language (sort keys %{$rhaa_Filters_by_Language}) {
 | 
						|
        next if defined $Extension_Collision{$language};
 | 
						|
        if (!defined $rh_Scale_Factor->{$language}) {
 | 
						|
            $OK = 0;
 | 
						|
            warn "Missing scale factor for $language\n";
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    my %seen_it = ();
 | 
						|
    foreach my $ext (sort keys %{$rh_Language_by_Extension}) {
 | 
						|
        my $language = $rh_Language_by_Extension->{$ext};
 | 
						|
        next if defined $Extension_Collision{$language};
 | 
						|
        next if $seen_it{$language};
 | 
						|
        if (!$rhaa_Filters_by_Language->{$language}) {
 | 
						|
            $OK = 0;
 | 
						|
            warn "Missing language filter for $language\n";
 | 
						|
        }
 | 
						|
        $seen_it{$language} = 1;
 | 
						|
    }
 | 
						|
    die unless $OK;
 | 
						|
} # 1}}}
 | 
						|
sub pre_post_fix {                           # {{{1
 | 
						|
    # Return the input lines prefixed and postfixed
 | 
						|
    # by the given strings.
 | 
						|
    my ($ra_lines, $prefix, $postfix ) = @_;
 | 
						|
    print "-> pre_post_fix with $prefix, $postfix\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $all_lines = $prefix . join(""  , @{$ra_lines}) . $postfix;
 | 
						|
 | 
						|
    print "<- pre_post_fix\n" if $opt_v > 2;
 | 
						|
    return split("\n", $all_lines);
 | 
						|
} # 1}}}
 | 
						|
sub rm_last_line {                           # {{{1
 | 
						|
    # Return all but the last line.
 | 
						|
    my ($ra_lines, ) = @_;
 | 
						|
    print "-> rm_last_line\n" if $opt_v > 2;
 | 
						|
    print "<- rm_last_line\n" if $opt_v > 2;
 | 
						|
    my $n = scalar(@{$ra_lines}) - 2;
 | 
						|
    return @{$ra_lines}[0..$n];
 | 
						|
} # 1}}}
 | 
						|
sub call_regexp_common {                     # {{{1
 | 
						|
    my ($ra_lines, $language ) = @_;
 | 
						|
    print "-> call_regexp_common for $language\n" if $opt_v > 2;
 | 
						|
 | 
						|
    Install_Regexp_Common() unless $HAVE_Rexexp_Common;
 | 
						|
 | 
						|
    my $all_lines = undef;
 | 
						|
    if ($language eq "C++") { # Regexp::Common's C++ comment regex is multi-line
 | 
						|
#       $all_lines = join("\n", @{$ra_lines});
 | 
						|
        $all_lines = "";
 | 
						|
        foreach (@{$ra_lines}) {
 | 
						|
            if (m/\\$/) {  # line ends with a continuation marker
 | 
						|
                $all_lines .= $_;
 | 
						|
            } else {
 | 
						|
                $all_lines .= "$_\n";
 | 
						|
            }
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
        $all_lines = join(""  , @{$ra_lines});
 | 
						|
    }
 | 
						|
 | 
						|
    no strict 'vars';
 | 
						|
    # otherwise get:
 | 
						|
    #  Global symbol "%RE" requires explicit package name at cloc line xx.
 | 
						|
    if ($all_lines =~ $RE{comment}{$language}) {
 | 
						|
        # Suppress "Use of uninitialized value in regexp compilation" that
 | 
						|
        # pops up when $1 is undefined--happens if there's a bug in the $RE
 | 
						|
        # This Pascal comment will trigger it:
 | 
						|
        #         (* This is { another } test. **)
 | 
						|
        # Curiously, testing for "defined $1" breaks the substitution.
 | 
						|
        no warnings;
 | 
						|
        # Remove comments.
 | 
						|
        $all_lines =~ s/$1//g;
 | 
						|
    }
 | 
						|
    # a bogus use of %RE to avoid:
 | 
						|
    # Name "main::RE" used only once: possible typo at cloc line xx.
 | 
						|
    print scalar keys %RE if $opt_v < -20;
 | 
						|
    print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | 
						|
    print "<- call_regexp_common\n" if $opt_v > 2;
 | 
						|
    return split("\n", $all_lines);
 | 
						|
} # 1}}}
 | 
						|
sub plural_form {                            # {{{1
 | 
						|
    # For getting the right plural form on some English nouns.
 | 
						|
    my $n = shift @_;
 | 
						|
    if ($n == 1) { return ( 1, "" ); }
 | 
						|
    else         { return ($n, "s"); }
 | 
						|
} # 1}}}
 | 
						|
sub matlab_or_objective_C {                  # {{{1
 | 
						|
    # Decide if code is MATLAB, Mathematica, Objective-C, MUMPS, or Mercury
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
        $rs_language , # out
 | 
						|
       ) = @_;
 | 
						|
    print "-> matlab_or_objective_C\n" if $opt_v > 2;
 | 
						|
    # matlab markers:
 | 
						|
    #   first line starts with "function"
 | 
						|
    #   some lines start with "%"
 | 
						|
    #   high marks for lines that start with [
 | 
						|
    #
 | 
						|
    # Objective-C markers:
 | 
						|
    #   must have at least two brace characters, { }
 | 
						|
    #   has /* ... */ style comments
 | 
						|
    #   some lines start with @
 | 
						|
    #   some lines start with #include
 | 
						|
    #
 | 
						|
    # MUMPS:
 | 
						|
    #   has ; comment markers
 | 
						|
    #   do not match:  \w+\s*=\s*\w
 | 
						|
    #   lines begin with   \s*\.?\w+\s+\w
 | 
						|
    #   high marks for lines that start with \s*K\s+ or \s*Kill\s+
 | 
						|
    #
 | 
						|
    # Mercury:
 | 
						|
    #   any line that begins with :- immediately triggers this
 | 
						|
    #
 | 
						|
    # Mathematica:
 | 
						|
    #   (* .. *)
 | 
						|
    #   BeginPackage
 | 
						|
 | 
						|
    ${$rs_language} = "";
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return;
 | 
						|
    }
 | 
						|
 | 
						|
    my $DEBUG              = 0;
 | 
						|
 | 
						|
    my $matlab_points      = 0;
 | 
						|
    my $mathematica_points = 0;
 | 
						|
    my $objective_C_points = 0;
 | 
						|
    my $mumps_points       = 0;
 | 
						|
    my $mercury_points     = 0;
 | 
						|
    my $has_braces         = 0;
 | 
						|
    while (<$IN>) {
 | 
						|
        ++$has_braces if $_ =~ m/[{}]/;
 | 
						|
#print "LINE $. has_braces=$has_braces\n";
 | 
						|
        ++$mumps_points if $. == 1 and m{^[A-Z]};
 | 
						|
        if      (m{^\s*/\*} or m {^\s*//}) {   #   /* or //
 | 
						|
            $objective_C_points += 5;
 | 
						|
            $matlab_points      -= 5;
 | 
						|
printf ".m:  /*|//  obj C=% 2d  matlab=% 2d  mathematica=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | 
						|
        } elsif (m{^:-\s+}) {      # gotta be mercury
 | 
						|
            $mercury_points = 1000;
 | 
						|
            last;
 | 
						|
        } elsif (m{\w+\s*=\s*\[}) {      # matrix assignment, very matlab
 | 
						|
            $matlab_points += 5;
 | 
						|
        }
 | 
						|
        if (m{\w+\[}) {      # function call by []
 | 
						|
            $mathematica_points += 2;
 | 
						|
printf ".m:  \\w=[   obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | 
						|
        } elsif (m{^\s*\w+\s*=\s*}) {    # definitely not MUMPS
 | 
						|
            --$mumps_points;
 | 
						|
printf ".m:  \\w=    obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | 
						|
        } elsif (m{^\s*\.?(\w)\s+(\w)} and $1 !~ /\d/ and $2 !~ /\d/) {
 | 
						|
            ++$mumps_points;
 | 
						|
printf ".m:  \\w \\w  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | 
						|
        } elsif (m{^\s*;}) {
 | 
						|
            ++$mumps_points;
 | 
						|
printf ".m:  ;      obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | 
						|
        }
 | 
						|
        if (m{^\s*#(include|import)}) {
 | 
						|
            # Objective-C without a doubt
 | 
						|
            $objective_C_points = 1000;
 | 
						|
            $matlab_points      = 0;
 | 
						|
printf ".m: #include obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | 
						|
            $has_braces         = 2;
 | 
						|
            last;
 | 
						|
        } elsif (m{^\s*@(interface|implementation|protocol|public|protected|private|end)\s}o) {
 | 
						|
            # Objective-C without a doubt
 | 
						|
            $objective_C_points = 1000;
 | 
						|
            $matlab_points      = 0;
 | 
						|
printf ".m: keyword obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | 
						|
            last;
 | 
						|
        } elsif (m{^\s*BeginPackage}) {
 | 
						|
            $mathematica_points += 2;
 | 
						|
        } elsif (m{^\s*\[}) {             #   line starts with [  -- very matlab
 | 
						|
            $matlab_points += 5;
 | 
						|
printf ".m:  [      obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | 
						|
        } elsif (m{^\sK(ill)?\s+}) {
 | 
						|
            $mumps_points  += 5;
 | 
						|
printf ".m:  Kill   obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | 
						|
        } elsif (m{^\s*function}) {
 | 
						|
            --$objective_C_points;
 | 
						|
            ++$matlab_points;
 | 
						|
printf ".m:  funct  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | 
						|
        } elsif (m{^\s*%}) {              #   %
 | 
						|
            # matlab commented line
 | 
						|
            --$objective_C_points;
 | 
						|
            ++$matlab_points;
 | 
						|
printf ".m:  pcent  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
printf "END LOOP    obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | 
						|
 | 
						|
    # next heuristic is unreliable for small files
 | 
						|
#   $objective_C_points = -9.9e20 unless $has_braces >= 2;
 | 
						|
 | 
						|
    my %points = ( 'MATLAB'      => $matlab_points     ,
 | 
						|
                   'Mathematica' => $mathematica_points     ,
 | 
						|
                   'MUMPS'       => $mumps_points      ,
 | 
						|
                   'Objective-C' => $objective_C_points,
 | 
						|
                   'Mercury'     => $mercury_points    , );
 | 
						|
 | 
						|
    ${$rs_language} = (sort { $points{$b} <=> $points{$a} or $a cmp $b } keys %points)[0];
 | 
						|
 | 
						|
    print "<- matlab_or_objective_C($file: matlab=$matlab_points, mathematica=$mathematica_points, C=$objective_C_points, mumps=$mumps_points, mercury=$mercury_points) => ${$rs_language}\n"
 | 
						|
        if $opt_v > 2;
 | 
						|
 | 
						|
} # 1}}}
 | 
						|
sub php_pascal_or_fortran {                  # {{{1
 | 
						|
    # Decide if code is Fortran, PHP, or Pascal
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
        $rs_language , # out
 | 
						|
       ) = @_;
 | 
						|
    print "-> php_pascal_or_fortran\n" if $opt_v > 2;
 | 
						|
    # fortran markers:
 | 
						|
    #   'implicit none' or 'implicit real' or 'implicit integer'
 | 
						|
    #   'program' or 'subroutine' or 'function' or 'module'
 | 
						|
    #   'end program' or 'end subroutine' or 'end function' or 'end module'
 | 
						|
    #   'write(', 'enddo'
 | 
						|
    #
 | 
						|
    # PHP:
 | 
						|
    #   must have at least two brace characters, { }
 | 
						|
    #   has /* ... */ style comments
 | 
						|
    #   some lines start with @
 | 
						|
    #   some lines start with #include
 | 
						|
    #
 | 
						|
    # Pascal:
 | 
						|
    #   lines ending with ;
 | 
						|
    #   writeln
 | 
						|
 | 
						|
    ${$rs_language} = "";
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return;
 | 
						|
    }
 | 
						|
 | 
						|
    my $DEBUG           = 0;
 | 
						|
 | 
						|
    my $fortran_90      = 0;
 | 
						|
    my $fortran_points  = 0;
 | 
						|
    my $php_points      = 0;
 | 
						|
    my $pascal_points   = 0;
 | 
						|
    while (<$IN>) {
 | 
						|
        if (/^\s*<\?php/i) {
 | 
						|
            $php_points += 100;
 | 
						|
            last;
 | 
						|
        } elsif (/^\s*end\./i ) {
 | 
						|
            $pascal_points += 100;
 | 
						|
            last;
 | 
						|
        } elsif (/^\s*implicit\s+(none|real|integer|complex|double)/i) {
 | 
						|
            $fortran_points += 100;
 | 
						|
            # don't end here; try to get 77 or 90 flavor
 | 
						|
        } elsif (/^\s*end;/i ) {
 | 
						|
            ++$pascal_points;
 | 
						|
            last;
 | 
						|
        } elsif (/^\s*program\b/i) {
 | 
						|
            ++$fortran_points;
 | 
						|
            ++$pascal_points;
 | 
						|
        # 'function' is common to all three languages so ignore it
 | 
						|
        } elsif (/^\s*(subroutine|module|stop)\b/i or
 | 
						|
                 /^\s*write\s*\(/i                 or
 | 
						|
                 /^\s*end\s+(program|subroutine)/i or
 | 
						|
                 /^\s*end\s*(if|do)\b/i) {
 | 
						|
            ++$fortran_points;
 | 
						|
        } elsif (/^\s*(writeln|begin|procedure|interface|implementation|const|var)\b/i) {
 | 
						|
            ++$pascal_points;
 | 
						|
        } elsif (/^\s*(<|\?>)/) {
 | 
						|
            ++$php_points;
 | 
						|
        } elsif (/;\s*$/) {
 | 
						|
            ++$pascal_points;
 | 
						|
            ++$php_points;
 | 
						|
        }
 | 
						|
        if (/^\s*(integer|real|complex|double|character|logical|public|private).*?::/i or
 | 
						|
            /^\s*(allocatable|dimension)/i or
 | 
						|
            /^\s*(end\s+)?(interface|type|function|module)\b/i) {
 | 
						|
            ++$fortran_90;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
 | 
						|
    my %points = ( 'Fortran'      => $fortran_points     ,
 | 
						|
                   'PHP'          => $php_points         ,
 | 
						|
                   'Pascal'       => $pascal_points      , );
 | 
						|
 | 
						|
    ${$rs_language} = (sort { $points{$b} <=> $points{$a} or $a cmp $b } keys %points)[0];
 | 
						|
    if (${$rs_language} eq 'Fortran') {
 | 
						|
        if ($fortran_90) {
 | 
						|
            ${$rs_language} = 'Fortran 90';
 | 
						|
        } else {
 | 
						|
            ${$rs_language} = 'Fortran 77';
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- php_pascal_or_fortran($file: fortran=$fortran_points, php=$php_points, pascal=$pascal_points) => ${$rs_language}\n"
 | 
						|
        if $opt_v > 2;
 | 
						|
 | 
						|
} # 1}}}
 | 
						|
sub Lisp_or_OpenCL {                         # {{{1
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> Lisp_or_OpenCL\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $lang = undef;
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return $lang;
 | 
						|
    }
 | 
						|
    my $lisp_points   = 0;
 | 
						|
    my $opcl_points = 0;
 | 
						|
    while (<$IN>) {
 | 
						|
        ++$lisp_points if  /^\s*;/;
 | 
						|
        ++$lisp_points if  /\((def|eval|require|export|let|loop|dec|format)/;
 | 
						|
        ++$opcl_points if  /^\s*(int|float|const|{)/;
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
    # print "lisp_points=$lisp_points   opcl_points=$opcl_points\n";
 | 
						|
    if ($lisp_points > $opcl_points) {
 | 
						|
        $lang = "Lisp";
 | 
						|
    } else {
 | 
						|
        $lang = "OpenCL";
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- Lisp_or_OpenCL\n" if $opt_v > 2;
 | 
						|
    return $lang;
 | 
						|
} # 1}}}
 | 
						|
sub Lisp_or_Julia {                          # {{{1
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> Lisp_or_Julia\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $lang = undef;
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return $lang;
 | 
						|
    }
 | 
						|
    my $lisp_points   = 0;
 | 
						|
    my $julia_points = 0;
 | 
						|
    while (<$IN>) {
 | 
						|
        ++$lisp_points if  /^\s*;/;
 | 
						|
        ++$lisp_points if  /\((def|eval|require|export|let|loop|dec|format)/;
 | 
						|
        ++$julia_points if  /^\s*(function|end|println|for|while)/;
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
    # print "lisp_points=$lisp_points   julia_points=$julia_points\n";
 | 
						|
    if ($lisp_points > $julia_points) {
 | 
						|
        $lang = "Lisp";
 | 
						|
    } else {
 | 
						|
        $lang = "Julia";
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- Lisp_or_Julia\n" if $opt_v > 2;
 | 
						|
    return $lang;
 | 
						|
} # 1}}}
 | 
						|
sub Perl_or_Prolog {                         # {{{1
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> Perl_or_Prolog\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $lang = undef;
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return $lang;
 | 
						|
    }
 | 
						|
    my $perl_points = 0;
 | 
						|
    my $prolog_points = 0;
 | 
						|
    while (<$IN>) {
 | 
						|
        next if /^\s*$/;
 | 
						|
        if ($. == 1 and /^#!.*?\bperl/) {
 | 
						|
            $perl_points = 100;
 | 
						|
            last;
 | 
						|
        }
 | 
						|
        ++$perl_points   if  /^=(head|over|item|cut)/;
 | 
						|
        ++$perl_points   if  /;\s*$/;
 | 
						|
        ++$perl_points   if  /(\{|\})/;
 | 
						|
        ++$perl_points   if  /^\s*sub\s+/;
 | 
						|
        ++$perl_points   if  /\s*<<'/;  # start HERE block
 | 
						|
        ++$perl_points   if  /\$(\w+\->|[_!])/;
 | 
						|
        ++$prolog_points if !/\s*#/ and /\.\s*$/;
 | 
						|
        ++$prolog_points if  /:-/;
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
    # print "perl_points=$perl_points   prolog_points=$prolog_points\n";
 | 
						|
    if ($perl_points > $prolog_points) {
 | 
						|
        $lang = "Perl";
 | 
						|
    } else {
 | 
						|
        $lang = "Prolog";
 | 
						|
    }
 | 
						|
 | 
						|
    printf "<- Perl_or_Prolog(%s, Perl=%d Prolog=%d)\n",
 | 
						|
        $file, $perl_points, $prolog_points if $opt_v > 2;
 | 
						|
    return $lang;
 | 
						|
} # 1}}}
 | 
						|
sub Raku_or_Prolog {                         # {{{1
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> Raku_or_Prolog\n" if $opt_v > 2;
 | 
						|
    my $lang = Perl_or_Prolog($file, $rh_Err, $raa_errors);
 | 
						|
    $lang = "Raku" if $lang eq "Perl";
 | 
						|
 | 
						|
    print "<- Raku_or_Prolog\n" if $opt_v > 2;
 | 
						|
    return $lang;
 | 
						|
} # 1}}}
 | 
						|
sub IDL_or_QtProject {                       # {{{1
 | 
						|
    # IDL, QtProject, Prolog, or ProGuard
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> IDL_or_QtProject($file)\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $lang = undef;
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return $lang;
 | 
						|
    }
 | 
						|
    my $idl_points      = 0;
 | 
						|
    my $qtproj_points   = 0;
 | 
						|
    my $prolog_points   = 0;
 | 
						|
    my $proguard_points = 0;
 | 
						|
    while (<$IN>) {
 | 
						|
        ++$idl_points      if /^\s*;/;
 | 
						|
        ++$idl_points      if /plot\(/i;
 | 
						|
        ++$qtproj_points   if /^\s*(qt|configs|sources|template|target|targetpath|subdirs)\b/i;
 | 
						|
        ++$qtproj_points   if /qthavemodule/i;
 | 
						|
        ++$prolog_points   if /\.\s*$/;
 | 
						|
        ++$prolog_points   if /:-/;
 | 
						|
        ++$proguard_points if /^\s*#/;
 | 
						|
        ++$proguard_points if /^-keep/;
 | 
						|
        ++$proguard_points if /^-(dont)?obfuscate/;
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
    # print "idl_points=$idl_points   qtproj_points=$qtproj_points\n";
 | 
						|
 | 
						|
    my %points = ( 'IDL'        => $idl_points       ,
 | 
						|
                   'Qt Project' => $qtproj_points    ,
 | 
						|
                   'Prolog'     => $prolog_points    ,
 | 
						|
                   'ProGuard'   => $proguard_points  ,
 | 
						|
                 );
 | 
						|
 | 
						|
    $lang = (sort { $points{$b} <=> $points{$a} or $a cmp $b} keys %points)[0];
 | 
						|
 | 
						|
    print "<- IDL_or_QtProject(idl_points=$idl_points, ",
 | 
						|
          "qtproj_points=$qtproj_points, prolog_points=$prolog_points, ",
 | 
						|
          "proguard_points=$proguard_points)\n"
 | 
						|
           if $opt_v > 2;
 | 
						|
    return $lang;
 | 
						|
} # 1}}}
 | 
						|
sub Ant_or_XML {                             # {{{1
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> Ant_or_XML($file)\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $lang = "XML";
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return $lang;
 | 
						|
    }
 | 
						|
    my $Ant_points   = 0;
 | 
						|
    my $XML_points   = 1;
 | 
						|
    while (<$IN>) {
 | 
						|
        if (/^\s*<project\s+/) {
 | 
						|
            ++$Ant_points  ;
 | 
						|
            --$XML_points  ;
 | 
						|
        }
 | 
						|
        if (/xmlns:artifact="antlib:org.apache.maven.artifact.ant"/) {
 | 
						|
            ++$Ant_points  ;
 | 
						|
            --$XML_points  ;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
 | 
						|
    if ($XML_points >= $Ant_points) {
 | 
						|
        # tie or better goes to XML
 | 
						|
        $lang = "XML";
 | 
						|
    } else {
 | 
						|
        $lang = "Ant";
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- Ant_or_XML($lang)\n" if $opt_v > 2;
 | 
						|
    return $lang;
 | 
						|
} # 1}}}
 | 
						|
sub Maven_or_XML {                           # {{{1
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> Maven_or_XML($file)\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $lang = "XML";
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return $lang;
 | 
						|
    }
 | 
						|
    my $Mvn_points   = 0;
 | 
						|
    my $XML_points   = 1;
 | 
						|
    while (<$IN>) {
 | 
						|
        if (/^\s*<project\s+/) {
 | 
						|
            ++$Mvn_points  ;
 | 
						|
            --$XML_points  ;
 | 
						|
        }
 | 
						|
        if (m{xmlns="http://maven.apache.org/POM/}) {
 | 
						|
            ++$Mvn_points  ;
 | 
						|
            --$XML_points  ;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
 | 
						|
    if ($XML_points >= $Mvn_points) {
 | 
						|
        # tie or better goes to XML
 | 
						|
        $lang = "XML";
 | 
						|
    } else {
 | 
						|
        $lang = "Maven";
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- Maven_or_XML($lang)\n" if $opt_v > 2;
 | 
						|
    return $lang;
 | 
						|
} # 1}}}
 | 
						|
sub pascal_or_puppet {                       # {{{1
 | 
						|
    # Decide if code is Pascal or Puppet manifest
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
        $rs_language , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> pascal_or_puppet\n" if $opt_v > 2;
 | 
						|
 | 
						|
    ${$rs_language} = "";
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return;
 | 
						|
    }
 | 
						|
 | 
						|
    my $DEBUG              = 0;
 | 
						|
    my $pascal_points      = 0;
 | 
						|
    my $puppet_points      = 0;
 | 
						|
 | 
						|
    while (<$IN>) {
 | 
						|
 | 
						|
        if ( /^\s*\#\s+/ ) {
 | 
						|
                $puppet_points += .001;
 | 
						|
                next;
 | 
						|
        }
 | 
						|
 | 
						|
        ++$pascal_points if /\bprogram\s+[A-Za-z]/i;
 | 
						|
        ++$pascal_points if /\bunit\s+[A-Za-z]/i;
 | 
						|
        ++$pascal_points if /\bmodule\s+[A-Za-z]/i;
 | 
						|
        ++$pascal_points if /\bprocedure\b/i;
 | 
						|
        ++$pascal_points if /\bfunction\b/i;
 | 
						|
        ++$pascal_points if /^\s*interface\s+/i;
 | 
						|
        ++$pascal_points if /^\s*implementation\s+/i;
 | 
						|
        ++$pascal_points if /^\s*uses\s+/i;
 | 
						|
        ++$pascal_points if /(?<!\:\:)\bbegin\b(?!\:\:)/i;
 | 
						|
        ++$pascal_points if /(?<!\:\:)\bend\b(?!\:\:)/i;
 | 
						|
        ++$pascal_points if /\:\=/;
 | 
						|
        ++$pascal_points if /\<\>/;
 | 
						|
        ++$pascal_points if /^\s*\{\$(I|INCLUDE)\s+.*\}/i;
 | 
						|
        ++$pascal_points if /writeln/;
 | 
						|
 | 
						|
        ++$puppet_points if /^\s*class\s+/ and not /class\s+operator\s+/;
 | 
						|
        ++$puppet_points if /^\s*function\s+[a-z][a-z0-9]+::[a-z][a-z0-9]+\s*/;
 | 
						|
        ++$puppet_points if /^\s*type\s+[A-Z]\w+::[A-Z]\w+\s+/;
 | 
						|
        ++$puppet_points if /^\s*case\s+/;
 | 
						|
        ++$puppet_points if /^\s*package\s+/;
 | 
						|
        ++$puppet_points if /^\s*file\s+/;
 | 
						|
        ++$puppet_points if /^\s*include\s\w+/;
 | 
						|
        ++$puppet_points if /^\s*service\s+/;
 | 
						|
        ++$puppet_points if /\s\$\w+\s*\=\s*\S/;
 | 
						|
        ++$puppet_points if /\S\s*\=\>\s*\S/;
 | 
						|
 | 
						|
        # No need to process rest of file if language seems obvious.
 | 
						|
        last
 | 
						|
                if (abs ($pascal_points - $puppet_points ) > 20 );
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
 | 
						|
    print "<- pascal_or_puppet(pascal=$pascal_points, puppet=$puppet_points)\n"
 | 
						|
        if $opt_v > 2;
 | 
						|
 | 
						|
    if ($pascal_points > $puppet_points) {
 | 
						|
        ${$rs_language} = "Pascal";
 | 
						|
    } else {
 | 
						|
        ${$rs_language} = "Puppet";
 | 
						|
    }
 | 
						|
 | 
						|
} # 1}}}
 | 
						|
sub Forth_or_Fortran {                       # {{{1
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> Forth_or_Fortran\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $lang = undef;
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return $lang;
 | 
						|
    }
 | 
						|
    my $forth_points = 0;
 | 
						|
    my $fortran_points = 0;
 | 
						|
    while (<$IN>) {
 | 
						|
        ++$forth_points if  /^:\s/;
 | 
						|
        ++$fortran_points if  /^([c*][^a-z]|\s{6,}(subroutine|program|end|implicit)\s|\s*!)/i;
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
    if ($forth_points > $fortran_points) {
 | 
						|
        $lang = "Forth";
 | 
						|
    } else {
 | 
						|
        $lang = "Fortran 77";
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- Forth_or_Fortran\n" if $opt_v > 2;
 | 
						|
    return $lang;
 | 
						|
} # 1}}}
 | 
						|
sub Forth_or_Fsharp {                        # {{{1
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> Forth_or_Fsharp\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $lang = undef;
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return $lang;
 | 
						|
    }
 | 
						|
    my $forth_points = 0;
 | 
						|
    my $fsharp_points = 0;
 | 
						|
    while (<$IN>) {
 | 
						|
        ++$forth_points if  /^:\s/;
 | 
						|
        ++$fsharp_points if  /^\s*(#light|import|let|module|namespace|open|type)/;
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
    if ($forth_points > $fsharp_points) {
 | 
						|
        $lang = "Forth";
 | 
						|
    } else {
 | 
						|
        $lang = "F#";
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- Forth_or_Fsharp\n" if $opt_v > 2;
 | 
						|
    return $lang;
 | 
						|
} # 1}}}
 | 
						|
sub Verilog_or_Coq {                         # {{{1
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> Verilog_or_Coq\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $lang = undef;
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return $lang;
 | 
						|
    }
 | 
						|
    my $coq_points     = 0;
 | 
						|
    my $verilog_points = 0;
 | 
						|
    while (<$IN>) {
 | 
						|
        ++$verilog_points if  /^\s*(module|begin|input|output|always)/;
 | 
						|
        ++$coq_points if /\b(Inductive|Fixpoint|Definition|
 | 
						|
                             Theorem|Lemma|Proof|Qed|forall|
 | 
						|
                             Section|Check|Notation|Variable|
 | 
						|
                             Goal|Fail|Require|Scheme|Module|Ltac|
 | 
						|
                             Set|Unset|Parameter|Coercion|Axiom|
 | 
						|
                             Locate|Type|Record|Existing|Class)\b/x;
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
    if ($coq_points > $verilog_points) {
 | 
						|
        $lang = "Coq";
 | 
						|
    } else {
 | 
						|
        $lang = "Verilog-SystemVerilog";
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- Verilog_or_Coq\n" if $opt_v > 2;
 | 
						|
    return $lang;
 | 
						|
} # 1}}}
 | 
						|
sub TypeScript_or_QtLinguist {               # {{{1
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> TypeScript_or_QtLinguist\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $lang = undef;
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return $lang;
 | 
						|
    }
 | 
						|
    my $tscript_points  = 0;
 | 
						|
    my $linguist_points = 0;
 | 
						|
    while (<$IN>) {
 | 
						|
        ++$linguist_points if m{\b</?(message|source|translation)>};
 | 
						|
        ++$tscript_points  if /^\s*(var|const|let|class|document)\b/;
 | 
						|
        ++$tscript_points  if /[;}]\s*$/;
 | 
						|
        ++$tscript_points  if m{^\s*//};
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
    if ($tscript_points >= $linguist_points) {
 | 
						|
        $lang = "TypeScript";
 | 
						|
    } else {
 | 
						|
        $lang = "Qt Linguist";
 | 
						|
    }
 | 
						|
    print "<- TypeScript_or_QtLinguist\n" if $opt_v > 2;
 | 
						|
    return $lang;
 | 
						|
} # 1}}}
 | 
						|
sub Qt_or_Glade {                            # {{{1
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> Qt_or_Glade\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $lang = undef;
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return $lang;
 | 
						|
    }
 | 
						|
    my $qt_points    =  1;
 | 
						|
    my $glade_points = -1;
 | 
						|
    while (<$IN>) {
 | 
						|
        if (/generated\s+with\s+glade/i) {
 | 
						|
            $glade_points =  1;
 | 
						|
            $qt_points    = -1;
 | 
						|
            last;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
    if ($glade_points > $qt_points) {
 | 
						|
        $lang = "Glade";
 | 
						|
    } else {
 | 
						|
        $lang = "Qt";
 | 
						|
    }
 | 
						|
    print "<- Qt_or_Glade\n" if $opt_v > 2;
 | 
						|
    return $lang;
 | 
						|
} # 1}}}
 | 
						|
sub Csharp_or_Smalltalk {                    # {{{1
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> Csharp_or_Smalltalk($file)\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $lang = undef;
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return $lang;
 | 
						|
    }
 | 
						|
    my $cs_points        = 0;
 | 
						|
    my $smalltalk_points = 0;
 | 
						|
    while (<$IN>) {
 | 
						|
        s{//.*?$}{};        # strip inline C# comments for better clarity
 | 
						|
        next if /^\s*$/;
 | 
						|
        if (/[;}{]\s*$/) {
 | 
						|
            ++$cs_points       ;
 | 
						|
        } elsif (/^(using|namespace)\s/) {
 | 
						|
            $cs_points += 20;
 | 
						|
        } elsif (/^\s*(public|private|new)\s/) {
 | 
						|
            $cs_points += 20;
 | 
						|
        } elsif (/^\s*\[assembly:/) {
 | 
						|
            ++$cs_points       ;
 | 
						|
        }
 | 
						|
        if (/(\!|\]\.)\s*$/) {
 | 
						|
            ++$smalltalk_points;
 | 
						|
            --$cs_points       ;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
    if ($smalltalk_points > $cs_points) {
 | 
						|
        $lang = "Smalltalk";
 | 
						|
    } else {
 | 
						|
        $lang = "C#";
 | 
						|
    }
 | 
						|
    print "<- Csharp_or_Smalltalk($file)=$lang\n" if $opt_v > 2;
 | 
						|
    return $lang;
 | 
						|
} # 1}}}
 | 
						|
sub Visual_Basic_or_TeX_or_Apex {            # {{{1
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
        $rs_language , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> Visual_Basic_or_TeX_or_Apex($file)\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $lang = undef;
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return $lang;
 | 
						|
    }
 | 
						|
    my $VB_points        = 0;
 | 
						|
    my $tex_points       = 0;
 | 
						|
    my $apex_points      = 0;
 | 
						|
    while (<$IN>) {
 | 
						|
        next if /^\s*$/;
 | 
						|
#print "$_";
 | 
						|
        if (/\s*%/ or /\s*\\/) {
 | 
						|
            ++$tex_points   ;
 | 
						|
        } else {
 | 
						|
            if (/^\s*(public|private)\s/i) {
 | 
						|
                ++$VB_points    ;
 | 
						|
                ++$apex_points  ;
 | 
						|
#print "+VB1 +A1";
 | 
						|
            } elsif (/^\s*(end|attribute|version)\s/i) {
 | 
						|
                ++$VB_points    ;
 | 
						|
#print "+VB2";
 | 
						|
            }
 | 
						|
            if (/[{}]/ or /;\s*$/) {
 | 
						|
                ++$apex_points  ;
 | 
						|
#print "+A2";
 | 
						|
            }
 | 
						|
#print "\n";
 | 
						|
        }
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
 | 
						|
    my %points = ( 'Visual Basic'   => $VB_points   ,
 | 
						|
                   'TeX'            => $tex_points  ,
 | 
						|
                   'Apex Class'     => $apex_points ,);
 | 
						|
 | 
						|
    ${$rs_language} = (sort { $points{$b} <=> $points{$a} or $a cmp $b } keys %points)[0];
 | 
						|
 | 
						|
    print "<- Visual_Basic_or_TeX_or_Apex($file: VB=$VB_points, TeX=$tex_points, Apex=$apex_points\n" if $opt_v > 2;
 | 
						|
    return $lang;
 | 
						|
} # 1}}}
 | 
						|
sub Scheme_or_SaltStack {                    # {{{1
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> Scheme_or_SaltStack($file)\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $lang = undef;
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | 
						|
        return $lang;
 | 
						|
    }
 | 
						|
    my $Sch_points = 0;
 | 
						|
    my $SS_points  = 0;
 | 
						|
    while (<$IN>) {
 | 
						|
        next if /^\s*$/;
 | 
						|
        if (/{\%.*%}/) {
 | 
						|
            $SS_points += 5;
 | 
						|
        } elsif (/map\.jinja\b/) {
 | 
						|
            $SS_points += 5;
 | 
						|
        } elsif (/\((define|lambda|let|cond|do)\s/) {
 | 
						|
            $Sch_points += 1;
 | 
						|
        } else {
 | 
						|
        }
 | 
						|
    }
 | 
						|
    $IN->close;
 | 
						|
 | 
						|
    print "<- Scheme_or_SaltStack($file: Scheme=$Sch_points, SaltStack=$SS_points\n" if $opt_v > 2;
 | 
						|
    if ($Sch_points > $SS_points) {
 | 
						|
        return "Scheme";
 | 
						|
    } else {
 | 
						|
        return "SaltStack";
 | 
						|
    }
 | 
						|
} # 1}}}
 | 
						|
sub html_colored_text {                      # {{{1
 | 
						|
    # http://www.pagetutor.com/pagetutor/makapage/pics/net216-2.gif
 | 
						|
    my ($color, $text) = @_;
 | 
						|
#?#die "html_colored_text($text)";
 | 
						|
    if      ($color =~ /^red$/i)   {
 | 
						|
        $color = "#ff0000";
 | 
						|
    } elsif ($color =~ /^green$/i) {
 | 
						|
        $color = "#00ff00";
 | 
						|
    } elsif ($color =~ /^blue$/i)  {
 | 
						|
        $color = "#0000ff";
 | 
						|
    } elsif ($color =~ /^grey$/i)  {
 | 
						|
        $color = "#cccccc";
 | 
						|
    }
 | 
						|
#   return "" unless $text;
 | 
						|
    return '<font color="' . $color . '">' . html_metachars($text) . "</font>";
 | 
						|
} # 1}}}
 | 
						|
sub xml_metachars {                          # {{{1
 | 
						|
    # http://en.wikipedia.org/wiki/Character_encodings_in_HTML#XML_character_references
 | 
						|
    my ($string, ) = shift @_;
 | 
						|
 | 
						|
    my  @in_chars    = split(//, $string);
 | 
						|
    my  @out_chars   = ();
 | 
						|
    foreach my $c (@in_chars) {
 | 
						|
        if      ($c eq '&') { push @out_chars, '&'
 | 
						|
        } elsif ($c eq '<') { push @out_chars, '<'
 | 
						|
        } elsif ($c eq '>') { push @out_chars, '>'
 | 
						|
        } elsif ($c eq '"') { push @out_chars, '"'
 | 
						|
        } elsif ($c eq "'") { push @out_chars, '''
 | 
						|
        } else {
 | 
						|
            push @out_chars, $c;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return join "", @out_chars;
 | 
						|
} # 1}}}
 | 
						|
sub html_metachars {                         # {{{1
 | 
						|
    # Replace HTML metacharacters with their printable forms.
 | 
						|
    # Future:  use HTML-Encoder-0.00_04/lib/HTML/Encoder.pm
 | 
						|
    # from Fabiano Reese Righetti's HTML::Encoder module if
 | 
						|
    # this subroutine proves to be too simplistic.
 | 
						|
    my ($string, ) = shift @_;
 | 
						|
 | 
						|
    my  @in_chars    = split(//, $string);
 | 
						|
    my  @out_chars   = ();
 | 
						|
    foreach my $c (@in_chars) {
 | 
						|
        if      ($c eq '<') {
 | 
						|
            push @out_chars, '<'
 | 
						|
        } elsif ($c eq '>') {
 | 
						|
            push @out_chars, '>'
 | 
						|
        } elsif ($c eq '&') {
 | 
						|
            push @out_chars, '&'
 | 
						|
        } else {
 | 
						|
            push @out_chars, $c;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return join "", @out_chars;
 | 
						|
} # 1}}}
 | 
						|
sub test_alg_diff {                          # {{{1
 | 
						|
    my ($file_1 ,
 | 
						|
        $file_2 )
 | 
						|
       = @_;
 | 
						|
    my $fh_1 = open_file('<', $file_1, 1);
 | 
						|
    die "Unable to read $file_1:  $!\n" unless defined $fh_1;
 | 
						|
    chomp(my @lines_1 = <$fh_1>);
 | 
						|
    $fh_1->close;
 | 
						|
 | 
						|
    my $fh_2 = open_file('<', $file_2, 1);
 | 
						|
    die "Unable to read $file_2:  $!\n" unless defined $fh_2;
 | 
						|
    chomp(my @lines_2 = <$fh_2>);
 | 
						|
    $fh_2->close;
 | 
						|
 | 
						|
    my $n_no_change = 0;
 | 
						|
    my $n_modified  = 0;
 | 
						|
    my $n_added     = 0;
 | 
						|
    my $n_deleted   = 0;
 | 
						|
    my @min_sdiff   = ();
 | 
						|
my $NN = chr(27) . "[0m";  # normal
 | 
						|
my $BB = chr(27) . "[1m";  # bold
 | 
						|
 | 
						|
    my @sdiffs = sdiff( \@lines_1, \@lines_2 );
 | 
						|
    foreach my $entry (@sdiffs) {
 | 
						|
        my ($out_1, $out_2) = ('', '');
 | 
						|
        if ($entry->[0] eq 'u') {
 | 
						|
            ++$n_no_change;
 | 
						|
          # $out_1 = $entry->[1];
 | 
						|
          # $out_2 = $entry->[2];
 | 
						|
            next;
 | 
						|
        }
 | 
						|
#       push @min_sdiff, $entry;
 | 
						|
        if      ($entry->[0] eq 'c') {
 | 
						|
            ++$n_modified;
 | 
						|
            ($out_1, $out_2) = diff_two_strings($entry->[1], $entry->[2]);
 | 
						|
            $out_1 =~ s/\cA(\w)/${BB}$1${NN}/g;
 | 
						|
            $out_2 =~ s/\cA(\w)/${BB}$1${NN}/g;
 | 
						|
          # $out_1 =~ s/\cA//g;
 | 
						|
          # $out_2 =~ s/\cA//g;
 | 
						|
        } elsif ($entry->[0] eq '+') {
 | 
						|
            ++$n_added;
 | 
						|
            $out_1 = $entry->[1];
 | 
						|
            $out_2 = $entry->[2];
 | 
						|
        } elsif ($entry->[0] eq '-') {
 | 
						|
            ++$n_deleted;
 | 
						|
            $out_1 = $entry->[1];
 | 
						|
            $out_2 = $entry->[2];
 | 
						|
        } elsif ($entry->[0] eq 'u') {
 | 
						|
        } else { die "unknown entry->[0]=[$entry->[0]]\n"; }
 | 
						|
        printf "%-80s | %s\n", $out_1, $out_2;
 | 
						|
    }
 | 
						|
 | 
						|
#   foreach my $entry (@min_sdiff) {
 | 
						|
#       printf "DIFF:  %s  %s  %s\n", @{$entry};
 | 
						|
#   }
 | 
						|
} # 1}}}
 | 
						|
sub write_comments_to_html {                 # {{{1
 | 
						|
    my ($filename      , # in
 | 
						|
        $rah_diff_L    , # in  see routine array_diff() for explanation
 | 
						|
        $rah_diff_R    , # in  see routine array_diff() for explanation
 | 
						|
        $rh_blank      , # in  location and counts of blank lines
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    print "-> write_comments_to_html($filename)\n" if $opt_v > 2;
 | 
						|
    my $file = $filename . ".html";
 | 
						|
 | 
						|
    my $approx_line_count = scalar @{$rah_diff_L};
 | 
						|
       $approx_line_count = 1 unless $approx_line_count;
 | 
						|
    my $n_digits = 1 + int(log($approx_line_count)/2.30258509299405); # log_10
 | 
						|
 | 
						|
    my $html_out = html_header($filename);
 | 
						|
 | 
						|
    my $comment_line_number = 0;
 | 
						|
    for (my $i = 0; $i < scalar @{$rah_diff_R}; $i++) {
 | 
						|
        if (defined $rh_blank->{$i}) {
 | 
						|
            foreach (1..$rh_blank->{$i}) {
 | 
						|
                $html_out .= "<!-- blank -->\n";
 | 
						|
            }
 | 
						|
        }
 | 
						|
        my $line_num = "";
 | 
						|
        my $pre      = "";
 | 
						|
        my $post     = '</span>  ';
 | 
						|
warn "undef rah_diff_R[$i]{type} " unless defined $rah_diff_R->[$i]{type};
 | 
						|
        if ($rah_diff_R->[$i]{type} eq 'nonexist') {
 | 
						|
            ++$comment_line_number;
 | 
						|
            $line_num = sprintf "\  <span class=\"clinenum\"> %0${n_digits}d %s",
 | 
						|
                            $comment_line_number, $post;
 | 
						|
            $pre = '<span class="comment">';
 | 
						|
            $html_out .= $line_num;
 | 
						|
            $html_out .= $pre .
 | 
						|
                         html_metachars($rah_diff_L->[$i]{char}) .
 | 
						|
                         $post . "\n";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        if      ($rah_diff_R->[$i]{type} eq 'code' and
 | 
						|
                 $rah_diff_R->[$i]{desc} eq 'same') {
 | 
						|
            # entire line remains as-is
 | 
						|
            $line_num = sprintf "\  <span class=\"linenum\"> %0${n_digits}d %s",
 | 
						|
                            $rah_diff_R->[$i]{lnum}, $post;
 | 
						|
            $pre    = '<span class="normal">';
 | 
						|
            $html_out .= $line_num;
 | 
						|
            $html_out .= $pre .
 | 
						|
                         html_metachars($rah_diff_R->[$i]{char}) . $post;
 | 
						|
#XX     } elsif ($rah_diff_R->[$i]{type} eq 'code') { # code+comments
 | 
						|
#XX
 | 
						|
#XX         $line_num = '<span class="linenum">' .
 | 
						|
#XX                      $rah_diff_R->[$i]{lnum} . $post;
 | 
						|
#XX         $html_out .= $line_num;
 | 
						|
#XX
 | 
						|
#XX         my @strings = @{$rah_diff_R->[$i]{char}{strings}};
 | 
						|
#XX         my @type    = @{$rah_diff_R->[$i]{char}{type}};
 | 
						|
#XX         for (my $i = 0; $i < scalar @strings; $i++) {
 | 
						|
#XX             if ($type[$i] eq 'u') {
 | 
						|
#XX                 $pre = '<span class="normal">';
 | 
						|
#XX             } else {
 | 
						|
#XX                 $pre = '<span class="comment">';
 | 
						|
#XX             }
 | 
						|
#XX             $html_out .= $pre .  html_metachars($strings[$i]) . $post;
 | 
						|
#XX         }
 | 
						|
# print Dumper(@strings, @type); die;
 | 
						|
 | 
						|
        } elsif ($rah_diff_R->[$i]{type} eq 'comment') {
 | 
						|
            $line_num = '<span class="clinenum">' . $comment_line_number . $post;
 | 
						|
            # entire line is a comment
 | 
						|
            $pre    = '<span class="comment">';
 | 
						|
            $html_out .= $pre .
 | 
						|
                         html_metachars($rah_diff_R->[$i]{char}) . $post;
 | 
						|
        }
 | 
						|
#printf "%-30s %s %-30s\n", $line_1, $separator, $line_2;
 | 
						|
        $html_out .= "\n";
 | 
						|
    }
 | 
						|
 | 
						|
    $html_out .= html_end();
 | 
						|
 | 
						|
    my $out_file = "$filename.html";
 | 
						|
    write_file($out_file, {}, ( $html_out ) );
 | 
						|
 | 
						|
    print "<- write_comments_to_html\n" if $opt_v > 2;
 | 
						|
} # 1}}}
 | 
						|
sub array_diff {                             # {{{1
 | 
						|
    my ($file          , # in  only used for error reporting
 | 
						|
        $ra_lines_L    , # in  array of lines in Left  file (no blank lines)
 | 
						|
        $ra_lines_R    , # in  array of lines in Right file (no blank lines)
 | 
						|
        $mode          , # in  "comment" | "revision"
 | 
						|
        $rah_diff_L    , # out
 | 
						|
        $rah_diff_R    , # out
 | 
						|
        $raa_Errors    , # in/out
 | 
						|
       ) = @_;
 | 
						|
 | 
						|
    # This routine operates in two ways:
 | 
						|
    # A. Computes diffs of the same file with and without comments.
 | 
						|
    #    This is used to classify lines as code, comments, or blank.
 | 
						|
    # B. Computes diffs of two revisions of a file.  This method
 | 
						|
    #    requires a prior run of method A using the older version
 | 
						|
    #    of the file because it needs lines to be classified.
 | 
						|
 | 
						|
    # $rah_diff structure:
 | 
						|
    # An array with n entries where n equals the number of lines in
 | 
						|
    # an sdiff of the two files.  Each entry in the array describes
 | 
						|
    # the contents of the corresponding line in file Left and file Right:
 | 
						|
    #  diff[]{type} = blank | code | code+comment | comment | nonexist
 | 
						|
    #        {lnum} = line number within the original file (1-based)
 | 
						|
    #        {desc} = same | added | removed | modified
 | 
						|
    #        {char} = the input line unless {desc} = 'modified' in
 | 
						|
    #                 which case
 | 
						|
    #        {char}{strings} = [ substrings ]
 | 
						|
    #        {char}{type}    = [ disposition (added, removed, etc)]
 | 
						|
    #
 | 
						|
 | 
						|
    @{$rah_diff_L} = ();
 | 
						|
    @{$rah_diff_R} = ();
 | 
						|
 | 
						|
    print "-> array_diff()\n" if $opt_v > 2;
 | 
						|
    my $COMMENT_MODE = 0;
 | 
						|
       $COMMENT_MODE = 1 if $mode eq "comment";
 | 
						|
 | 
						|
#print "array_diff(mode=$mode)\n";
 | 
						|
#print Dumper("block left:" , $ra_lines_L);
 | 
						|
#print Dumper("block right:", $ra_lines_R);
 | 
						|
 | 
						|
    my @sdiffs = ();
 | 
						|
    eval {
 | 
						|
        local $SIG{ALRM} = sub { die "alarm\n" };
 | 
						|
        alarm $opt_diff_timeout;
 | 
						|
        @sdiffs = sdiff($ra_lines_L, $ra_lines_R);
 | 
						|
        alarm 0;
 | 
						|
    };
 | 
						|
    if ($@) {
 | 
						|
        # timed out
 | 
						|
        die unless $@ eq "alarm\n"; # propagate unexpected errors
 | 
						|
        push @{$raa_Errors},
 | 
						|
             [ $Error_Codes{'Diff error, exceeded timeout'}, $file ];
 | 
						|
        if ($opt_v) {
 | 
						|
          warn "array_diff: diff timeout failure for $file--ignoring\n";
 | 
						|
        }
 | 
						|
        return;
 | 
						|
    }
 | 
						|
 | 
						|
    my $n_L        = 0;
 | 
						|
    my $n_R        = 0;
 | 
						|
    my $n_sdiff    = 0;  # index to $rah_diff_L, $rah_diff_R
 | 
						|
    foreach my $triple (@sdiffs) {
 | 
						|
        my $flag   = $triple->[0];
 | 
						|
        my $line_L = $triple->[1];
 | 
						|
        my $line_R = $triple->[2];
 | 
						|
        $rah_diff_L->[$n_sdiff]{char} = $line_L;
 | 
						|
        $rah_diff_R->[$n_sdiff]{char} = $line_R;
 | 
						|
        if      ($flag eq 'u') {  # u = unchanged
 | 
						|
            ++$n_L;
 | 
						|
            ++$n_R;
 | 
						|
            if ($COMMENT_MODE) {
 | 
						|
                # line exists in both with & without comments, must be code
 | 
						|
                $rah_diff_L->[$n_sdiff]{type} = "code";
 | 
						|
                $rah_diff_R->[$n_sdiff]{type} = "code";
 | 
						|
            }
 | 
						|
            $rah_diff_L->[$n_sdiff]{desc} = "same";
 | 
						|
            $rah_diff_R->[$n_sdiff]{desc} = "same";
 | 
						|
            $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
 | 
						|
            $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
 | 
						|
        } elsif ($flag eq 'c') {  # c = changed
 | 
						|
# warn "per line sdiff() commented out\n"; if (0) {
 | 
						|
            ++$n_L;
 | 
						|
            ++$n_R;
 | 
						|
 | 
						|
            if ($COMMENT_MODE) {
 | 
						|
                # line has text both with & without comments;
 | 
						|
                # count as code
 | 
						|
                $rah_diff_L->[$n_sdiff]{type} = "code";
 | 
						|
                $rah_diff_R->[$n_sdiff]{type} = "code";
 | 
						|
            }
 | 
						|
 | 
						|
            my @chars_L = split '', $line_L;
 | 
						|
            my @chars_R = split '', $line_R;
 | 
						|
 | 
						|
            $rah_diff_L->[$n_sdiff]{desc} = "modified";
 | 
						|
            $rah_diff_R->[$n_sdiff]{desc} = "modified";
 | 
						|
            $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
 | 
						|
            $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
 | 
						|
 | 
						|
        } elsif ($flag eq '+') {  # + = added
 | 
						|
            ++$n_R;
 | 
						|
            if ($COMMENT_MODE) {
 | 
						|
                # should never get here, but may due to sdiff() bug,
 | 
						|
                # ref https://rt.cpan.org/Public/Bug/Display.html?id=131629
 | 
						|
                # Rather than failing, ignore and continue.  A possible
 | 
						|
                # consequence is counts may be inconsistent.
 | 
						|
#####           @{$rah_diff_L} = ();
 | 
						|
#####           @{$rah_diff_R} = ();
 | 
						|
#####           push @{$raa_Errors},
 | 
						|
#####                [ $Error_Codes{'Diff error (quoted comments?)'}, $file ];
 | 
						|
                if ($opt_v) {
 | 
						|
                  warn "array_diff: diff failure (diff says the\n";
 | 
						|
                  warn "comment-free file has added lines).\n";
 | 
						|
                  warn "$n_sdiff  $line_L\n";
 | 
						|
                }
 | 
						|
            }
 | 
						|
####        $rah_diff_L->[$n_sdiff]{type} = "nonexist";
 | 
						|
            $rah_diff_L->[$n_sdiff]{type} = "comment";
 | 
						|
            $rah_diff_L->[$n_sdiff]{desc} = "removed";
 | 
						|
            $rah_diff_R->[$n_sdiff]{desc} = "added";
 | 
						|
            $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
 | 
						|
        } elsif ($flag eq '-') {  # - = removed
 | 
						|
            ++$n_L;
 | 
						|
            if ($COMMENT_MODE) {
 | 
						|
                # line must be comment because blanks already gone
 | 
						|
                $rah_diff_L->[$n_sdiff]{type} = "comment";
 | 
						|
            }
 | 
						|
            $rah_diff_R->[$n_sdiff]{type} = "nonexist";
 | 
						|
            $rah_diff_R->[$n_sdiff]{desc} = "removed";
 | 
						|
            $rah_diff_L->[$n_sdiff]{desc} = "added";
 | 
						|
            $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
 | 
						|
        }
 | 
						|
#printf "%-30s %s %-30s\n", $line_L, $separator, $line_R;
 | 
						|
        ++$n_sdiff;
 | 
						|
    }
 | 
						|
#use Data::Dumper::Simple;
 | 
						|
#print Dumper($rah_diff_L, $rah_diff_R);
 | 
						|
#print Dumper($rah_diff_L);
 | 
						|
 | 
						|
    print "<- array_diff\n" if $opt_v > 2;
 | 
						|
} # 1}}}
 | 
						|
sub remove_leading_dir {                     # {{{1
 | 
						|
    my @filenames = @_;
 | 
						|
    #
 | 
						|
    #  Input should be a list of file names
 | 
						|
    #  with the same leading directory such as
 | 
						|
    #
 | 
						|
    #      dir1/dir2/a.txt
 | 
						|
    #      dir1/dir2/b.txt
 | 
						|
    #      dir1/dir2/dir3/c.txt
 | 
						|
    #
 | 
						|
    #  Output is the same list minus the common
 | 
						|
    #  directory path:
 | 
						|
    #
 | 
						|
    #      a.txt
 | 
						|
    #      b.txt
 | 
						|
    #      dir3/c.txt
 | 
						|
    #
 | 
						|
    print "-> remove_leading_dir()\n" if $opt_v > 2;
 | 
						|
    my @D = (); # a matrix:   [ [ dir1, dir2 ],         # dir1/dir2/a.txt
 | 
						|
                #               [ dir1, dir2 ],         # dir1/dir2/b.txt
 | 
						|
                #               [ dir1, dir2 , dir3] ]  # dir1/dir2/dir3/c.txt
 | 
						|
    if ($ON_WINDOWS) {
 | 
						|
        foreach my $F (@filenames) {
 | 
						|
            $F =~ s{\\}{/}g;
 | 
						|
            $F = ucfirst($F) if $F =~ /^\w:/;  # uppercase drive letter
 | 
						|
        }
 | 
						|
    }
 | 
						|
    if (scalar @filenames == 1) {
 | 
						|
        # special case:  with only one filename
 | 
						|
        # cannot determine a baseline, just remove first directory level
 | 
						|
        $filenames[0] =~ s{^.*?/}{};
 | 
						|
        # print "-> $filenames[0]\n";
 | 
						|
        return $filenames[0];
 | 
						|
    }
 | 
						|
    foreach my $F (@filenames) {
 | 
						|
        my ($Vol, $Dir, $File) = File::Spec->splitpath($F);
 | 
						|
        my @x = File::Spec->splitdir( $Dir );
 | 
						|
        pop @x unless $x[$#x]; # last entry usually null, remove it
 | 
						|
        if ($ON_WINDOWS) {
 | 
						|
            if (defined($Vol) and $Vol) {
 | 
						|
                # put the drive letter, eg, C:, at the front
 | 
						|
                unshift @x, uc $Vol;
 | 
						|
            }
 | 
						|
        }
 | 
						|
#print "F=$F, Dir=$Dir  x=[", join("][", @x), "]\n";
 | 
						|
        push @D, [ @x ];
 | 
						|
    }
 | 
						|
 | 
						|
    # now loop over columns until either they are all
 | 
						|
    # eliminated or a unique column is found
 | 
						|
 | 
						|
    my @common   = ();  # to contain the common leading directories
 | 
						|
    my $mismatch = 0;
 | 
						|
    while (!$mismatch) {
 | 
						|
        for (my $row = 1; $row < scalar @D; $row++) {
 | 
						|
#print "comparing $D[$row][0] to $D[0][0]\n";
 | 
						|
 | 
						|
            if (!defined $D[$row][0] or !defined $D[0][0] or
 | 
						|
                ($D[$row][0] ne $D[0][0])) {
 | 
						|
                $mismatch = 1;
 | 
						|
                last;
 | 
						|
            }
 | 
						|
        }
 | 
						|
#print "mismatch=$mismatch\n";
 | 
						|
        if (!$mismatch) {
 | 
						|
            push @common, $D[0][0];
 | 
						|
            # all terms in the leading match; unshift the batch
 | 
						|
            foreach my $ra (@D) {
 | 
						|
                shift @{$ra};
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    push @common, " ";  # so that $leading will end with "/ "
 | 
						|
    my $leading = File::Spec->catdir( @common );
 | 
						|
       $leading =~ s{ $}{};  # now take back the bogus appended space
 | 
						|
#print "remove_leading_dir leading=[$leading]\n"; die;
 | 
						|
    if ($ON_WINDOWS) {
 | 
						|
       $leading =~ s{\\}{/}g;
 | 
						|
    }
 | 
						|
    foreach my $F (@filenames) {
 | 
						|
        $F =~ s{^$leading}{};
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- remove_leading_dir()\n" if $opt_v > 2;
 | 
						|
    return @filenames;
 | 
						|
 | 
						|
} # 1}}}
 | 
						|
sub strip_leading_dir {                      # {{{1
 | 
						|
    my ($leading, @filenames) = @_;
 | 
						|
    #  removes the string $leading from each entry in @filenames
 | 
						|
    print "-> strip_leading_dir()\n" if $opt_v > 2;
 | 
						|
 | 
						|
#print "remove_leading_dir leading=[$leading]\n"; die;
 | 
						|
    if ($ON_WINDOWS) {
 | 
						|
       $leading =~ s{\\}{/}g;
 | 
						|
        foreach my $F (@filenames) {
 | 
						|
            $F =~ s{\\}{/}g;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    foreach my $F (@filenames) {
 | 
						|
#print "strip_leading_dir F before $F\n";
 | 
						|
        if ($ON_WINDOWS) {
 | 
						|
            $F =~ s{^$leading}{}i;
 | 
						|
        } else {
 | 
						|
            $F =~ s{^$leading}{};
 | 
						|
        }
 | 
						|
#print "strip_leading_dir F after  $F\n";
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- strip_leading_dir()\n" if $opt_v > 2;
 | 
						|
    return @filenames;
 | 
						|
 | 
						|
} # 1}}}
 | 
						|
sub find_deepest_file {                      # {{{1
 | 
						|
    my @filenames = @_;
 | 
						|
    #
 | 
						|
    #  Input should be a list of file names
 | 
						|
    #  with the same leading directory such as
 | 
						|
    #
 | 
						|
    #      dir1/dir2/a.txt
 | 
						|
    #      dir1/dir2/b.txt
 | 
						|
    #      dir1/dir2/dir3/c.txt
 | 
						|
    #
 | 
						|
    #  Output is the file with the most parent directories:
 | 
						|
    #
 | 
						|
    #      dir1/dir2/dir3/c.txt
 | 
						|
 | 
						|
    print "-> find_deepest_file()\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $deepest    = undef;
 | 
						|
    my $max_subdir = -1;
 | 
						|
    foreach my $F (sort @filenames) {
 | 
						|
        my ($Vol, $Dir, $File) = File::Spec->splitpath($F);
 | 
						|
        my @x = File::Spec->splitdir( $Dir );
 | 
						|
        pop @x unless $x[$#x]; # last entry usually null, remove it
 | 
						|
        if (scalar @x > $max_subdir) {
 | 
						|
            $deepest    = $F;
 | 
						|
            $max_subdir = scalar @x;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- find_deepest_file()\n" if $opt_v > 2;
 | 
						|
    return $deepest;
 | 
						|
 | 
						|
} # 1}}}
 | 
						|
sub find_uncommon_parent_dir {               # {{{1
 | 
						|
    my ($file_L, $file_R) = @_;
 | 
						|
    #
 | 
						|
    # example:
 | 
						|
    #
 | 
						|
    #   file_L = "perl-5.16.1/cpan/CPANPLUS/lib/CPANPLUS/Internals/Source/SQLite/Tie.pm"
 | 
						|
    #   file_R = "/tmp/8VxQG0OLbp/perl-5.16.3/cpan/CPANPLUS/lib/CPANPLUS/Internals/Source/SQLite/Tie.pm"
 | 
						|
    #
 | 
						|
    # then return
 | 
						|
    #
 | 
						|
    #   "perl-5.16.1",
 | 
						|
    #   "/tmp/8VxQG0OLbp/perl-5.16.3",
 | 
						|
 | 
						|
    my ($Vol_L, $Dir_L, $File_L) = File::Spec->splitpath($file_L);
 | 
						|
    my @x_L = File::Spec->splitdir( $Dir_L );
 | 
						|
    my ($Vol_R, $Dir_R, $File_R) = File::Spec->splitpath($file_R);
 | 
						|
    my @x_R = File::Spec->splitdir( $Dir_R );
 | 
						|
 | 
						|
    my @common  = ();
 | 
						|
 | 
						|
    # work backwards
 | 
						|
    while ($x_L[$#x_L] eq $x_R[$#x_R]) {
 | 
						|
        push @common, $x_L[$#x_L];
 | 
						|
        pop  @x_L;
 | 
						|
        pop  @x_R;
 | 
						|
    }
 | 
						|
    my $success = scalar @common;
 | 
						|
 | 
						|
    my $dirs_L = File::Spec->catdir( @x_L );
 | 
						|
    my $dirs_R = File::Spec->catdir( @x_R );
 | 
						|
    my $lead_L = File::Spec->catpath( $Vol_L, $dirs_L, "" );
 | 
						|
    my $lead_R = File::Spec->catpath( $Vol_R, $dirs_R, "" );
 | 
						|
 | 
						|
    return $lead_L, $lead_R, $success;
 | 
						|
 | 
						|
} # 1}}}
 | 
						|
sub get_leading_dirs {                       # {{{1
 | 
						|
    my ($rh_file_list_L, $rh_file_list_R) = @_;
 | 
						|
    # find uniquely named files in both sets to help determine the
 | 
						|
    # leading directory positions
 | 
						|
    my %unique_filename = ();
 | 
						|
    my %basename_L = ();
 | 
						|
    my %basename_R = ();
 | 
						|
    foreach my $f (keys %{$rh_file_list_L}) {
 | 
						|
        my $bn = basename($f);
 | 
						|
        $basename_L{ $bn }{'count'}   += 1;
 | 
						|
        $basename_L{ $bn }{'fullpath'} = $f;
 | 
						|
    }
 | 
						|
    foreach my $f (keys %{$rh_file_list_R}) {
 | 
						|
        my $bn = basename($f);
 | 
						|
        $basename_R{ $bn }{'count'}   += 1;
 | 
						|
        $basename_R{ $bn }{'fullpath'} = $f;
 | 
						|
    }
 | 
						|
    foreach my $f (keys %basename_L) {
 | 
						|
        next unless $basename_L{$f}{'count'} == 1;
 | 
						|
        next unless defined $basename_R{$f} and $basename_R{$f}{'count'} == 1;
 | 
						|
        $unique_filename{$f}{'L'} = $basename_L{ $f }{'fullpath'};
 | 
						|
        $unique_filename{$f}{'R'} = $basename_R{ $f }{'fullpath'};
 | 
						|
    }
 | 
						|
    return undef, undef, 0 unless %unique_filename;
 | 
						|
 | 
						|
####my %candidate_leading_dir_L = ();
 | 
						|
####my %candidate_leading_dir_R = ();
 | 
						|
    my ($L_drop, $R_drop) = (undef, undef);
 | 
						|
    foreach my $f (keys %unique_filename) {
 | 
						|
        my $fL = $unique_filename{ $f }{'L'};
 | 
						|
        my $fR = $unique_filename{ $f }{'R'};
 | 
						|
 | 
						|
        my @DL = File::Spec->splitdir($fL);
 | 
						|
        my @DR = File::Spec->splitdir($fR);
 | 
						|
#printf "%-36s -> %-36s\n", $fL, $fR;
 | 
						|
#print Dumper(@DL, @DR);
 | 
						|
        # find the most number of common directories between L and R
 | 
						|
        if (!defined $L_drop) {
 | 
						|
            $L_drop = dirname $fL;
 | 
						|
        }
 | 
						|
        if (!defined $R_drop) {
 | 
						|
            $R_drop = dirname $fR;
 | 
						|
        }
 | 
						|
        my $n_path_elements_L = scalar @DL;
 | 
						|
        my $n_path_elements_R = scalar @DR;
 | 
						|
        my $n_path_elem = $n_path_elements_L < $n_path_elements_R ?
 | 
						|
                          $n_path_elements_L : $n_path_elements_R;
 | 
						|
        my ($n_L_drop_this_pair, $n_R_drop_this_pair) = (0, 0);
 | 
						|
        for (my $i = 0; $i < $n_path_elem; $i++) {
 | 
						|
            last if $DL[ $#DL - $i] ne $DR[ $#DR - $i];
 | 
						|
            ++$n_L_drop_this_pair;
 | 
						|
            ++$n_R_drop_this_pair;
 | 
						|
        }
 | 
						|
        my $L_common = File::Spec->catdir( @DL[0..($#DL-$n_L_drop_this_pair)] );
 | 
						|
        my $R_common = File::Spec->catdir( @DR[0..($#DR-$n_R_drop_this_pair)] );
 | 
						|
#print "L_common=$L_common\n";
 | 
						|
#print "R_common=$R_common\n";
 | 
						|
        $L_drop = $L_common if length $L_common < length $L_drop;
 | 
						|
        $R_drop = $R_common if length $R_common < length $R_drop;
 | 
						|
 | 
						|
        $L_drop = $L_drop . "/" if $L_drop;
 | 
						|
        $R_drop = $R_drop . "/" if $R_drop;
 | 
						|
########my $ptr_L = length($fL) - 1;
 | 
						|
########my $ptr_R = length($fR) - 1;
 | 
						|
########my @aL    = split '', $fL;
 | 
						|
########my @aR    = split '', $fR;
 | 
						|
########while ($ptr_L >= 0 and $ptr_R >= 0) {
 | 
						|
########    last if $aL[$ptr_L] ne $aR[$ptr_R];
 | 
						|
########    --$ptr_L;
 | 
						|
########    --$ptr_R;
 | 
						|
########}
 | 
						|
########my $leading_dir_L = "";
 | 
						|
########   $leading_dir_L = substr($fL, 0, $ptr_L+1) if $ptr_L >= 0;
 | 
						|
########my $leading_dir_R = "";
 | 
						|
########   $leading_dir_R = substr($fR, 0, $ptr_R+1) if $ptr_R >= 0;
 | 
						|
########++$candidate_leading_dir_L{$leading_dir_L};
 | 
						|
########++$candidate_leading_dir_R{$leading_dir_R};
 | 
						|
    }
 | 
						|
#use Data::Dumper::Simple;
 | 
						|
    # at this point path separator on Windows is already /
 | 
						|
 | 
						|
    while ($L_drop =~ m{//}) {
 | 
						|
        $L_drop =~ s{//}{/}g;
 | 
						|
    }
 | 
						|
    while ($R_drop =~ m{//}) {
 | 
						|
        $R_drop =~ s{//}{/}g;
 | 
						|
    }
 | 
						|
 | 
						|
#print "L_drop=$L_drop\n";
 | 
						|
#print "R_drop=$R_drop\n";
 | 
						|
    return $L_drop, $R_drop, 1;
 | 
						|
####my $best_L = (sort {
 | 
						|
####           $candidate_leading_dir_L{$b} <=>
 | 
						|
####           $candidate_leading_dir_L{$a}} keys %candidate_leading_dir_L)[0];
 | 
						|
####my $best_R = (sort {
 | 
						|
####           $candidate_leading_dir_R{$b} <=>
 | 
						|
####           $candidate_leading_dir_R{$a}} keys %candidate_leading_dir_R)[0];
 | 
						|
####return $best_L, $best_R, 1;
 | 
						|
} # 1}}}
 | 
						|
sub align_by_pairs {                         # {{{1
 | 
						|
    my ($rh_file_list_L        , # in
 | 
						|
        $rh_file_list_R        , # in
 | 
						|
        $ra_added              , # out
 | 
						|
        $ra_removed            , # out
 | 
						|
        $ra_compare_list       , # out
 | 
						|
        ) = @_;
 | 
						|
    print "-> align_by_pairs()\n" if $opt_v > 2;
 | 
						|
    @{$ra_compare_list} = ();
 | 
						|
 | 
						|
    my @files_L = sort keys %{$rh_file_list_L};
 | 
						|
    my @files_R = sort keys %{$rh_file_list_R};
 | 
						|
    return () unless @files_L or  @files_R;  # at least one must have stuff
 | 
						|
    if ($ON_WINDOWS) {
 | 
						|
        foreach (@files_L) { $_ =~ s{\\}{/}g; }
 | 
						|
        foreach (@files_R) { $_ =~ s{\\}{/}g; }
 | 
						|
        if ($opt_ignore_case_ext) {
 | 
						|
            foreach (@files_L) { $_ = lc $_; }
 | 
						|
            foreach (@files_R) { $_ = lc $_; }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    if      ( @files_L and !@files_R) {
 | 
						|
        # left side has stuff, right side is empty; everything deleted
 | 
						|
        @{$ra_added   }     = ();
 | 
						|
        @{$ra_removed }     = @files_L;
 | 
						|
        @{$ra_compare_list} = ();
 | 
						|
        return;
 | 
						|
    } elsif (!@files_L and  @files_R) {
 | 
						|
        # left side is empty, right side has stuff; everything added
 | 
						|
        @{$ra_added   }     = @files_R;
 | 
						|
        @{$ra_removed }     = ();
 | 
						|
        @{$ra_compare_list} = ();
 | 
						|
        return;
 | 
						|
    } elsif (scalar(@files_L) == 1 and scalar(@files_R) == 1) {
 | 
						|
        # Special case of comparing one file against another.  In
 | 
						|
        # this case force the pair to be aligned with each other,
 | 
						|
        # otherwise the file naming logic will think one file
 | 
						|
        # was added and the other deleted.
 | 
						|
        @{$ra_added   }     = ();
 | 
						|
        @{$ra_removed }     = ();
 | 
						|
        @{$ra_compare_list} = ( [$files_L[0], $files_R[0]] );
 | 
						|
        return;
 | 
						|
    }
 | 
						|
#use Data::Dumper::Simple;
 | 
						|
#print Dumper("align_by_pairs", %{$rh_file_list_L}, %{$rh_file_list_R},);
 | 
						|
#die;
 | 
						|
 | 
						|
    # The harder case:  compare groups of files.  This only works
 | 
						|
    # if the groups are in different directories so the first step
 | 
						|
    # is to strip the leading directory names from file lists to
 | 
						|
    # make it possible to align by file names.
 | 
						|
    my @files_L_minus_dir = undef;
 | 
						|
    my @files_R_minus_dir = undef;
 | 
						|
 | 
						|
    my $deepest_file_L    = find_deepest_file(@files_L);
 | 
						|
    my $deepest_file_R    = find_deepest_file(@files_R);
 | 
						|
#print "deepest L = [$deepest_file_L]\n";
 | 
						|
#print "deepest R = [$deepest_file_R]\n";
 | 
						|
    my ($leading_dir_L, $leading_dir_R, $success) =
 | 
						|
                get_leading_dirs($rh_file_list_L, $rh_file_list_R);
 | 
						|
#print "leading_dir_L=[$leading_dir_L]\n";
 | 
						|
#print "leading_dir_R=[$leading_dir_R]\n";
 | 
						|
#print "success      =[$success]\n";
 | 
						|
    if ($success) {
 | 
						|
        @files_L_minus_dir = strip_leading_dir($leading_dir_L, @files_L);
 | 
						|
        @files_R_minus_dir = strip_leading_dir($leading_dir_R, @files_R);
 | 
						|
    } else {
 | 
						|
        # otherwise fall back to old strategy
 | 
						|
        @files_L_minus_dir = remove_leading_dir(@files_L);
 | 
						|
        @files_R_minus_dir = remove_leading_dir(@files_R);
 | 
						|
    }
 | 
						|
 | 
						|
    # Keys of the stripped_X arrays are canonical file names;
 | 
						|
    # should overlap mostly.  Keys in stripped_L but not in
 | 
						|
    # stripped_R are files that have been deleted.  Keys in
 | 
						|
    # stripped_R but not in stripped_L have been added.
 | 
						|
    my %stripped_L = ();
 | 
						|
       @stripped_L{ @files_L_minus_dir } = @files_L;
 | 
						|
    my %stripped_R = ();
 | 
						|
       @stripped_R{ @files_R_minus_dir } = @files_R;
 | 
						|
 | 
						|
    my %common = ();
 | 
						|
    foreach my $f (keys %stripped_L) {
 | 
						|
        $common{$f}  = 1 if     defined $stripped_R{$f};
 | 
						|
    }
 | 
						|
 | 
						|
    my %deleted = ();
 | 
						|
    foreach my $f (keys %stripped_L) {
 | 
						|
        $deleted{$stripped_L{$f}} = $f unless defined $stripped_R{$f};
 | 
						|
    }
 | 
						|
 | 
						|
    my %added = ();
 | 
						|
    foreach my $f (keys %stripped_R) {
 | 
						|
        $added{$stripped_R{$f}}   = $f unless defined $stripped_L{$f};
 | 
						|
    }
 | 
						|
 | 
						|
#use Data::Dumper::Simple;
 | 
						|
#print Dumper("align_by_pairs", %stripped_L, %stripped_R);
 | 
						|
#print Dumper("align_by_pairs", %common, %added, %deleted);
 | 
						|
 | 
						|
    foreach my $f (sort keys %common) {
 | 
						|
        push @{$ra_compare_list}, [ $stripped_L{$f},
 | 
						|
                                    $stripped_R{$f} ];
 | 
						|
    }
 | 
						|
    @{$ra_added   } = keys %added  ;
 | 
						|
    @{$ra_removed } = keys %deleted;
 | 
						|
 | 
						|
    print "<- align_by_pairs()\n" if $opt_v > 2;
 | 
						|
    return;
 | 
						|
#print Dumper("align_by_pairs", @files_L_minus_dir, @files_R_minus_dir);
 | 
						|
#die;
 | 
						|
} # 1}}}
 | 
						|
sub html_header {                            # {{{1
 | 
						|
    my ($title , ) = @_;
 | 
						|
 | 
						|
    print "-> html_header\n" if $opt_v > 2;
 | 
						|
    return
 | 
						|
'<html>
 | 
						|
<head>
 | 
						|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 | 
						|
<meta name="GENERATOR" content="cloc http://github.com/AlDanial/cloc">
 | 
						|
' .
 | 
						|
"
 | 
						|
<!-- Created by $script v$VERSION -->
 | 
						|
<title>$title</title>
 | 
						|
" .
 | 
						|
'
 | 
						|
<style TYPE="text/css">
 | 
						|
<!--
 | 
						|
    body {
 | 
						|
        color: black;
 | 
						|
        background-color: white;
 | 
						|
        font-family: monospace
 | 
						|
    }
 | 
						|
 | 
						|
    .whitespace {
 | 
						|
        background-color: gray;
 | 
						|
    }
 | 
						|
 | 
						|
    .comment {
 | 
						|
        color: gray;
 | 
						|
        font-style: italic;
 | 
						|
    }
 | 
						|
 | 
						|
    .clinenum {
 | 
						|
        color: red;
 | 
						|
    }
 | 
						|
 | 
						|
    .linenum {
 | 
						|
        color: green;
 | 
						|
    }
 | 
						|
 -->
 | 
						|
</style>
 | 
						|
</head>
 | 
						|
<body>
 | 
						|
<pre><tt>
 | 
						|
';
 | 
						|
    print "<- html_header\n" if $opt_v > 2;
 | 
						|
} # 1}}}
 | 
						|
sub html_end {                               # {{{1
 | 
						|
return
 | 
						|
'</tt></pre>
 | 
						|
</body>
 | 
						|
</html>
 | 
						|
';
 | 
						|
} # 1}}}
 | 
						|
sub die_unknown_lang {                       # {{{1
 | 
						|
    my ($lang, $option_name) = @_;
 | 
						|
    die "Unknown language '$lang' used with $option_name option.  " .
 | 
						|
        "The command\n  $script --show-lang\n" .
 | 
						|
        "will print all recognized languages.  Language names are " .
 | 
						|
        "case sensitive.\n" ;
 | 
						|
} # 1}}}
 | 
						|
sub unicode_file {                           # {{{1
 | 
						|
    my $file = shift @_;
 | 
						|
 | 
						|
    print "-> unicode_file($file)\n" if $opt_v > 2;
 | 
						|
    return 0 if (get_size($file) > 2_000_000);
 | 
						|
    # don't bother trying to test binary files bigger than 2 MB
 | 
						|
 | 
						|
    my $IN = open_file('<', $file, 1);
 | 
						|
    if (!defined $IN) {
 | 
						|
        warn "Unable to read $file; ignoring.\n";
 | 
						|
        return 0;
 | 
						|
    }
 | 
						|
    my @lines = <$IN>;
 | 
						|
    $IN->close;
 | 
						|
 | 
						|
    if (unicode_to_ascii( join('', @lines) )) {
 | 
						|
        print "<- unicode_file()\n" if $opt_v > 2;
 | 
						|
        return 1;
 | 
						|
    } else {
 | 
						|
        print "<- unicode_file()\n" if $opt_v > 2;
 | 
						|
        return 0;
 | 
						|
    }
 | 
						|
 | 
						|
} # 1}}}
 | 
						|
sub unicode_to_ascii {                       # {{{1
 | 
						|
    my $string = shift @_;
 | 
						|
 | 
						|
    # A trivial attempt to convert UTF-16 little or big endian
 | 
						|
    # files into ASCII.  These files exhibit the following byte
 | 
						|
    # sequence:
 | 
						|
    #   byte   1:  255
 | 
						|
    #   byte   2:  254
 | 
						|
    #   byte   3:  ord of ASCII character
 | 
						|
    #   byte   4:    0
 | 
						|
    #   byte 3+i:  ord of ASCII character
 | 
						|
    #   byte 4+i:    0
 | 
						|
    # or
 | 
						|
    #   byte   1:  255
 | 
						|
    #   byte   2:  254
 | 
						|
    #   byte   3:    0
 | 
						|
    #   byte   4:  ord of ASCII character
 | 
						|
    #   byte 3+i:    0
 | 
						|
    #   byte 4+i:  ord of ASCII character
 | 
						|
    #
 | 
						|
    print "-> unicode_to_ascii()\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $length  = length $string;
 | 
						|
#print "length=$length\n";
 | 
						|
    return '' if $length <= 3;
 | 
						|
    my @unicode = split(//, $string);
 | 
						|
 | 
						|
    # check the first 100 characters (= 200 bytes) for big or
 | 
						|
    # little endian UTF-16 encoding
 | 
						|
    my $max_peek     = $length < 200 ? $length : 200;
 | 
						|
    my $max_for_pass = $length < 200 ? 0.9*$max_peek/2 : 90;
 | 
						|
    my @view_1   = ();
 | 
						|
    for (my $i = 2; $i < $max_peek; $i += 2) { push @view_1, $unicode[$i] }
 | 
						|
    my @view_2   = ();
 | 
						|
    for (my $i = 3; $i < $max_peek; $i += 2) { push @view_2, $unicode[$i] }
 | 
						|
 | 
						|
    my $points_1 = 0;
 | 
						|
    foreach my $C (@view_1) {
 | 
						|
        ++$points_1 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
 | 
						|
                                                          or ord($C) == 10
 | 
						|
                                                          or ord($C) ==  9;
 | 
						|
    }
 | 
						|
 | 
						|
    my $points_2 = 0;
 | 
						|
    foreach my $C (@view_2) {
 | 
						|
        ++$points_2 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
 | 
						|
                                                          or ord($C) == 10
 | 
						|
                                                          or ord($C) ==  9;
 | 
						|
    }
 | 
						|
#print "points 1: $points_1\n";
 | 
						|
#print "points 2: $points_2\n";
 | 
						|
#print "max_peek    : $max_peek\n";
 | 
						|
#print "max_for_pass: $max_for_pass\n";
 | 
						|
 | 
						|
    my $offset = undef;
 | 
						|
    if    ($points_1 > $max_for_pass) { $offset = 2; }
 | 
						|
    elsif ($points_2 > $max_for_pass) { $offset = 3; }
 | 
						|
    else                   {
 | 
						|
        print "<- unicode_to_ascii() a p1=$points_1 p2=$points_2\n" if $opt_v > 2;
 | 
						|
        return '';
 | 
						|
    }  # neither big or little endian UTF-16
 | 
						|
 | 
						|
    my @ascii              = ();
 | 
						|
    for (my $i = $offset; $i < $length; $i += 2) {
 | 
						|
        # some compound characters are made of HT (9), LF (10), or CR (13)
 | 
						|
        # True HT, LF, CR are followed by 00; only add those.
 | 
						|
        my $L = $unicode[$i];
 | 
						|
        if (ord($L) == 9 or ord($L) == 10 or ord($L) == 13) {
 | 
						|
            my $companion;
 | 
						|
            if ($points_1) {
 | 
						|
                last if $i+1 >= $length;
 | 
						|
                $companion = $unicode[$i+1];
 | 
						|
            } else {
 | 
						|
                $companion = $unicode[$i-1];
 | 
						|
            }
 | 
						|
            if (ord($companion) == 0) {
 | 
						|
                push @ascii, $L;
 | 
						|
            } else {
 | 
						|
                push @ascii, " ";  # no clue what this letter is
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            push @ascii, $L;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    print "<- unicode_to_ascii() b p1=$points_1 p2=$points_2\n" if $opt_v > 2;
 | 
						|
    return join("", @ascii);
 | 
						|
} # 1}}}
 | 
						|
sub uncompress_archive_cmd {                 # {{{1
 | 
						|
    my ($archive_file, ) = @_;
 | 
						|
 | 
						|
    # Wrap $archive_file in single or double quotes in the system
 | 
						|
    # commands below to avoid filename chicanery (including
 | 
						|
    # spaces in the names).
 | 
						|
 | 
						|
    print "-> uncompress_archive_cmd($archive_file)\n" if $opt_v > 2;
 | 
						|
    my $extract_cmd = "";
 | 
						|
    my $missing     = "";
 | 
						|
    if ($opt_extract_with) {
 | 
						|
        ( $extract_cmd = $opt_extract_with ) =~ s/>FILE</$archive_file/g;
 | 
						|
    } elsif (basename($archive_file) eq "-" and !$ON_WINDOWS) {
 | 
						|
        $extract_cmd = "cat > -";
 | 
						|
    } elsif ($archive_file =~ /\.tar$/ and $ON_WINDOWS) {
 | 
						|
        $extract_cmd = "tar -xf \"$archive_file\"";
 | 
						|
    } elsif (($archive_file =~ /\.tar\.(gz|Z)$/ or
 | 
						|
              $archive_file =~ /\.tgz$/       ) and !$ON_WINDOWS)    {
 | 
						|
        if (external_utility_exists("gzip --version")) {
 | 
						|
            if (external_utility_exists("tar --version")) {
 | 
						|
                $extract_cmd = "gzip -dc '$archive_file' | tar xf -";
 | 
						|
            } else {
 | 
						|
                $missing = "tar";
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            $missing = "gzip";
 | 
						|
        }
 | 
						|
    } elsif ($archive_file =~ /\.tar\.bz2$/ and !$ON_WINDOWS)    {
 | 
						|
        if (external_utility_exists("bzip2 --help")) {
 | 
						|
            if (external_utility_exists("tar --version")) {
 | 
						|
                $extract_cmd = "bzip2 -dc '$archive_file' | tar xf -";
 | 
						|
            } else {
 | 
						|
                $missing = "tar";
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            $missing = "bzip2";
 | 
						|
        }
 | 
						|
    } elsif ($archive_file =~ /\.tar\.xz$/ and !$ON_WINDOWS)    {
 | 
						|
        if (external_utility_exists("unxz --version")) {
 | 
						|
            if (external_utility_exists("tar --version")) {
 | 
						|
                $extract_cmd = "unxz -dc '$archive_file' | tar xf -";
 | 
						|
            } else {
 | 
						|
                $missing = "tar";
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            $missing = "bzip2";
 | 
						|
        }
 | 
						|
    } elsif ($archive_file =~ /\.tar$/ and !$ON_WINDOWS)    {
 | 
						|
        $extract_cmd = "tar xf '$archive_file'";
 | 
						|
    } elsif ($archive_file =~ /\.src\.rpm$/i and !$ON_WINDOWS) {
 | 
						|
        if (external_utility_exists("cpio --version")) {
 | 
						|
            if (external_utility_exists("rpm2cpio")) {
 | 
						|
                $extract_cmd = "rpm2cpio '$archive_file' | cpio -i";
 | 
						|
            } else {
 | 
						|
                $missing = "rpm2cpio";
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            $missing = "bzip2";
 | 
						|
        }
 | 
						|
    } elsif ($archive_file =~ /\.(whl|zip)$/i and !$ON_WINDOWS)    {
 | 
						|
        if (external_utility_exists("unzip")) {
 | 
						|
            $extract_cmd = "unzip -qq -d . '$archive_file'";
 | 
						|
        } else {
 | 
						|
            $missing = "unzip";
 | 
						|
        }
 | 
						|
    } elsif ($archive_file =~ /\.deb$/i and !$ON_WINDOWS)    {
 | 
						|
        # only useful if the .deb contains source code--most
 | 
						|
        # .deb files just have compiled executables
 | 
						|
        if (external_utility_exists("dpkg-deb")) {
 | 
						|
            $extract_cmd = "dpkg-deb -x '$archive_file' .";
 | 
						|
        } else {
 | 
						|
            $missing = "dpkg-deb";
 | 
						|
        }
 | 
						|
    } elsif ($ON_WINDOWS and $archive_file =~ /\.(whl|zip)$/i) {
 | 
						|
        # use unzip on Windows (comes with git-for-Windows)
 | 
						|
        if (external_utility_exists("unzip")) {
 | 
						|
             $extract_cmd = "unzip -qq -d . \"$archive_file\" ";
 | 
						|
        } else {
 | 
						|
            $missing = "unzip";
 | 
						|
        }
 | 
						|
    }
 | 
						|
    print "<- uncompress_archive_cmd\n" if $opt_v > 2;
 | 
						|
    if ($missing) {
 | 
						|
        die "Unable to expand $archive_file because external\n",
 | 
						|
            "utility '$missing' is not available.\n",
 | 
						|
            "Another possibility is to use the --extract-with option.\n";
 | 
						|
    } else {
 | 
						|
        return $extract_cmd;
 | 
						|
    }
 | 
						|
}
 | 
						|
# 1}}}
 | 
						|
sub read_list_file {                         # {{{1
 | 
						|
    my ($file, ) = @_;
 | 
						|
    # reads filenames from a STDIN pipe if $file == "-"
 | 
						|
 | 
						|
    print "-> read_list_file($file)\n" if $opt_v > 2;
 | 
						|
    my @entry = ();
 | 
						|
 | 
						|
    if ($file eq "-") {
 | 
						|
        # read from a STDIN pipe
 | 
						|
        my $IN;
 | 
						|
        open($IN, $file);
 | 
						|
        if (!defined $IN) {
 | 
						|
            warn "Unable to read $file; ignoring.\n";
 | 
						|
            return ();
 | 
						|
        }
 | 
						|
        while (<$IN>) {
 | 
						|
            next if /^\s*$/ or /^\s*#/; # skip empty or commented lines
 | 
						|
            s/\cM$//;  # DOS to Unix
 | 
						|
            chomp;
 | 
						|
            push @entry, $_;
 | 
						|
        }
 | 
						|
        $IN->close;
 | 
						|
    } else {
 | 
						|
        # read from an actual file
 | 
						|
        foreach my $line (read_file($file)) {
 | 
						|
            next if $line =~ /^\s*$/ or $line =~ /^\s*#/;
 | 
						|
            $line =~ s/\cM$//;  # DOS to Unix
 | 
						|
            chomp $line;
 | 
						|
            push @entry, $line;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- read_list_file\n" if $opt_v > 2;
 | 
						|
    return @entry;
 | 
						|
}
 | 
						|
# 1}}}
 | 
						|
sub external_utility_exists {                # {{{1
 | 
						|
    my $exe = shift @_;
 | 
						|
 | 
						|
    my $success      = 0;
 | 
						|
    if ($ON_WINDOWS) {
 | 
						|
        $success = 1 unless system $exe . ' > nul';
 | 
						|
    } else {
 | 
						|
        $success = 1 unless system $exe . ' >/dev/null 2>&1';
 | 
						|
        if (!$success) {
 | 
						|
            $success = 1 unless system "which" . " $exe" . ' >/dev/null 2>&1';
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    return $success;
 | 
						|
} # 1}}}
 | 
						|
sub write_xsl_file {                         # {{{1
 | 
						|
    print "-> write_xsl_file\n" if $opt_v > 2;
 | 
						|
    my $XSL =             # <style>  </style> {{{2
 | 
						|
'<?xml version="1.0" encoding="UTF-8"?>
 | 
						|
<!-- XSL file by Paul Schwann, January 2009.
 | 
						|
     Fixes for by-file and by-file-by-lang by d_uragan, November 2010.
 | 
						|
     -->
 | 
						|
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 | 
						|
  <xsl:output method="html"/>
 | 
						|
  <xsl:template match="/">
 | 
						|
    <html xmlns="http://www.w3.org/1999/xhtml">
 | 
						|
      <head>
 | 
						|
        <title>CLOC Results</title>
 | 
						|
      </head>
 | 
						|
      <style type="text/css">
 | 
						|
        table {
 | 
						|
          table-layout: auto;
 | 
						|
          border-collapse: collapse;
 | 
						|
          empty-cells: show;
 | 
						|
        }
 | 
						|
        td, th {
 | 
						|
          padding: 4px;
 | 
						|
        }
 | 
						|
        th {
 | 
						|
          background-color: #CCCCCC;
 | 
						|
        }
 | 
						|
        td {
 | 
						|
          text-align: center;
 | 
						|
        }
 | 
						|
        table, td, tr, th {
 | 
						|
          border: thin solid #999999;
 | 
						|
        }
 | 
						|
      </style>
 | 
						|
      <body>
 | 
						|
        <h3><xsl:value-of select="results/header"/></h3>
 | 
						|
';
 | 
						|
# 2}}}
 | 
						|
 | 
						|
    if ($opt_by_file) {
 | 
						|
        $XSL .=             # <table> </table>{{{2
 | 
						|
'        <table>
 | 
						|
          <thead>
 | 
						|
            <tr>
 | 
						|
              <th>File</th>
 | 
						|
              <th>Blank</th>
 | 
						|
              <th>Comment</th>
 | 
						|
              <th>Code</th>
 | 
						|
              <th>Language</th>
 | 
						|
';
 | 
						|
        $XSL .=
 | 
						|
'             <th>3<sup>rd</sup> Generation Equivalent</th>
 | 
						|
              <th>Scale</th>
 | 
						|
' if $opt_3;
 | 
						|
        $XSL .=
 | 
						|
'           </tr>
 | 
						|
          </thead>
 | 
						|
          <tbody>
 | 
						|
          <xsl:for-each select="results/files/file">
 | 
						|
            <tr>
 | 
						|
              <th><xsl:value-of select="@name"/></th>
 | 
						|
              <td><xsl:value-of select="@blank"/></td>
 | 
						|
              <td><xsl:value-of select="@comment"/></td>
 | 
						|
              <td><xsl:value-of select="@code"/></td>
 | 
						|
              <td><xsl:value-of select="@language"/></td>
 | 
						|
';
 | 
						|
        $XSL .=
 | 
						|
'             <td><xsl:value-of select="@factor"/></td>
 | 
						|
              <td><xsl:value-of select="@scaled"/></td>
 | 
						|
' if $opt_3;
 | 
						|
        $XSL .=
 | 
						|
'           </tr>
 | 
						|
          </xsl:for-each>
 | 
						|
            <tr>
 | 
						|
              <th>Total</th>
 | 
						|
              <th><xsl:value-of select="results/files/total/@blank"/></th>
 | 
						|
              <th><xsl:value-of select="results/files/total/@comment"/></th>
 | 
						|
              <th><xsl:value-of select="results/files/total/@code"/></th>
 | 
						|
              <th><xsl:value-of select="results/files/total/@language"/></th>
 | 
						|
';
 | 
						|
        $XSL .=
 | 
						|
'             <th><xsl:value-of select="results/files/total/@factor"/></th>
 | 
						|
              <th><xsl:value-of select="results/files/total/@scaled"/></th>
 | 
						|
' if $opt_3;
 | 
						|
        $XSL .=
 | 
						|
'           </tr>
 | 
						|
          </tbody>
 | 
						|
        </table>
 | 
						|
        <br/>
 | 
						|
';
 | 
						|
# 2}}}
 | 
						|
    }
 | 
						|
 | 
						|
    if (!$opt_by_file or $opt_by_file_by_lang) {
 | 
						|
        $XSL .=             # <table> </table> {{{2
 | 
						|
'       <table>
 | 
						|
          <thead>
 | 
						|
            <tr>
 | 
						|
              <th>Language</th>
 | 
						|
              <th>Files</th>
 | 
						|
              <th>Blank</th>
 | 
						|
              <th>Comment</th>
 | 
						|
              <th>Code</th>
 | 
						|
';
 | 
						|
        $XSL .=
 | 
						|
'             <th>Scale</th>
 | 
						|
              <th>3<sup>rd</sup> Generation Equivalent</th>
 | 
						|
' if $opt_3;
 | 
						|
        $XSL .=
 | 
						|
'           </tr>
 | 
						|
          </thead>
 | 
						|
          <tbody>
 | 
						|
          <xsl:for-each select="results/languages/language">
 | 
						|
            <tr>
 | 
						|
              <th><xsl:value-of select="@name"/></th>
 | 
						|
              <td><xsl:value-of select="@files_count"/></td>
 | 
						|
              <td><xsl:value-of select="@blank"/></td>
 | 
						|
              <td><xsl:value-of select="@comment"/></td>
 | 
						|
              <td><xsl:value-of select="@code"/></td>
 | 
						|
';
 | 
						|
        $XSL .=
 | 
						|
'             <td><xsl:value-of select="@factor"/></td>
 | 
						|
              <td><xsl:value-of select="@scaled"/></td>
 | 
						|
' if $opt_3;
 | 
						|
        $XSL .=
 | 
						|
'          </tr>
 | 
						|
          </xsl:for-each>
 | 
						|
            <tr>
 | 
						|
              <th>Total</th>
 | 
						|
              <th><xsl:value-of select="results/languages/total/@sum_files"/></th>
 | 
						|
              <th><xsl:value-of select="results/languages/total/@blank"/></th>
 | 
						|
              <th><xsl:value-of select="results/languages/total/@comment"/></th>
 | 
						|
              <th><xsl:value-of select="results/languages/total/@code"/></th>
 | 
						|
';
 | 
						|
        $XSL .=
 | 
						|
'             <th><xsl:value-of select="results/languages/total/@factor"/></th>
 | 
						|
              <th><xsl:value-of select="results/languages/total/@scaled"/></th>
 | 
						|
' if $opt_3;
 | 
						|
        $XSL .=
 | 
						|
'           </tr>
 | 
						|
          </tbody>
 | 
						|
        </table>
 | 
						|
';
 | 
						|
# 2}}}
 | 
						|
    }
 | 
						|
 | 
						|
    $XSL.= <<'EO_XSL'; # {{{2
 | 
						|
      </body>
 | 
						|
    </html>
 | 
						|
  </xsl:template>
 | 
						|
</xsl:stylesheet>
 | 
						|
 | 
						|
EO_XSL
 | 
						|
# 2}}}
 | 
						|
 | 
						|
    my $XSL_DIFF = <<'EO_DIFF_XSL'; # {{{2
 | 
						|
<?xml version="1.0" encoding="UTF-8"?>
 | 
						|
<!-- XSL file by Blazej Kroll, November 2010 -->
 | 
						|
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 | 
						|
  <xsl:output method="html"/>
 | 
						|
  <xsl:template match="/">
 | 
						|
    <html xmlns="http://www.w3.org/1999/xhtml">
 | 
						|
      <head>
 | 
						|
        <title>CLOC Results</title>
 | 
						|
      </head>
 | 
						|
      <style type="text/css">
 | 
						|
        table {
 | 
						|
          table-layout: auto;
 | 
						|
          border-collapse: collapse;
 | 
						|
          empty-cells: show;
 | 
						|
          margin: 1em;
 | 
						|
        }
 | 
						|
        td, th {
 | 
						|
          padding: 4px;
 | 
						|
        }
 | 
						|
        th {
 | 
						|
          background-color: #CCCCCC;
 | 
						|
        }
 | 
						|
        td {
 | 
						|
          text-align: center;
 | 
						|
        }
 | 
						|
        table, td, tr, th {
 | 
						|
          border: thin solid #999999;
 | 
						|
        }
 | 
						|
      </style>
 | 
						|
      <body>
 | 
						|
        <h3><xsl:value-of select="results/header"/></h3>
 | 
						|
EO_DIFF_XSL
 | 
						|
# 2}}}
 | 
						|
 | 
						|
    if ($opt_by_file) {
 | 
						|
        $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
 | 
						|
        <table>
 | 
						|
          <thead>
 | 
						|
          <tr><th colspan="4">Same</th>
 | 
						|
          </tr>
 | 
						|
            <tr>
 | 
						|
              <th>File</th>
 | 
						|
              <th>Blank</th>
 | 
						|
              <th>Comment</th>
 | 
						|
              <th>Code</th>
 | 
						|
            </tr>
 | 
						|
          </thead>
 | 
						|
          <tbody>
 | 
						|
          <xsl:for-each select="diff_results/same/file">
 | 
						|
            <tr>
 | 
						|
              <th><xsl:value-of select="@name"/></th>
 | 
						|
              <td><xsl:value-of select="@blank"/></td>
 | 
						|
              <td><xsl:value-of select="@comment"/></td>
 | 
						|
              <td><xsl:value-of select="@code"/></td>
 | 
						|
            </tr>
 | 
						|
          </xsl:for-each>
 | 
						|
          </tbody>
 | 
						|
        </table>
 | 
						|
 | 
						|
        <table>
 | 
						|
          <thead>
 | 
						|
          <tr><th colspan="4">Modified</th>
 | 
						|
          </tr>
 | 
						|
            <tr>
 | 
						|
              <th>File</th>
 | 
						|
              <th>Blank</th>
 | 
						|
              <th>Comment</th>
 | 
						|
              <th>Code</th>
 | 
						|
            </tr>
 | 
						|
          </thead>
 | 
						|
          <tbody>
 | 
						|
          <xsl:for-each select="diff_results/modified/file">
 | 
						|
            <tr>
 | 
						|
              <th><xsl:value-of select="@name"/></th>
 | 
						|
              <td><xsl:value-of select="@blank"/></td>
 | 
						|
              <td><xsl:value-of select="@comment"/></td>
 | 
						|
              <td><xsl:value-of select="@code"/></td>
 | 
						|
            </tr>
 | 
						|
          </xsl:for-each>
 | 
						|
          </tbody>
 | 
						|
        </table>
 | 
						|
 | 
						|
        <table>
 | 
						|
          <thead>
 | 
						|
          <tr><th colspan="4">Added</th>
 | 
						|
          </tr>
 | 
						|
            <tr>
 | 
						|
              <th>File</th>
 | 
						|
              <th>Blank</th>
 | 
						|
              <th>Comment</th>
 | 
						|
              <th>Code</th>
 | 
						|
            </tr>
 | 
						|
          </thead>
 | 
						|
          <tbody>
 | 
						|
          <xsl:for-each select="diff_results/added/file">
 | 
						|
            <tr>
 | 
						|
              <th><xsl:value-of select="@name"/></th>
 | 
						|
              <td><xsl:value-of select="@blank"/></td>
 | 
						|
              <td><xsl:value-of select="@comment"/></td>
 | 
						|
              <td><xsl:value-of select="@code"/></td>
 | 
						|
            </tr>
 | 
						|
          </xsl:for-each>
 | 
						|
          </tbody>
 | 
						|
        </table>
 | 
						|
 | 
						|
        <table>
 | 
						|
          <thead>
 | 
						|
          <tr><th colspan="4">Removed</th>
 | 
						|
          </tr>
 | 
						|
            <tr>
 | 
						|
              <th>File</th>
 | 
						|
              <th>Blank</th>
 | 
						|
              <th>Comment</th>
 | 
						|
              <th>Code</th>
 | 
						|
            </tr>
 | 
						|
          </thead>
 | 
						|
          <tbody>
 | 
						|
          <xsl:for-each select="diff_results/removed/file">
 | 
						|
            <tr>
 | 
						|
              <th><xsl:value-of select="@name"/></th>
 | 
						|
              <td><xsl:value-of select="@blank"/></td>
 | 
						|
              <td><xsl:value-of select="@comment"/></td>
 | 
						|
              <td><xsl:value-of select="@code"/></td>
 | 
						|
            </tr>
 | 
						|
          </xsl:for-each>
 | 
						|
          </tbody>
 | 
						|
        </table>
 | 
						|
EO_DIFF_XSL
 | 
						|
# 2}}}
 | 
						|
    }
 | 
						|
 | 
						|
    if (!$opt_by_file or $opt_by_file_by_lang) {
 | 
						|
        $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
 | 
						|
        <table>
 | 
						|
          <thead>
 | 
						|
          <tr><th colspan="5">Same</th>
 | 
						|
          </tr>
 | 
						|
            <tr>
 | 
						|
              <th>Language</th>
 | 
						|
              <th>Files</th>
 | 
						|
              <th>Blank</th>
 | 
						|
              <th>Comment</th>
 | 
						|
              <th>Code</th>
 | 
						|
            </tr>
 | 
						|
          </thead>
 | 
						|
          <tbody>
 | 
						|
          <xsl:for-each select="diff_results/same/language">
 | 
						|
            <tr>
 | 
						|
              <th><xsl:value-of select="@name"/></th>
 | 
						|
              <td><xsl:value-of select="@files_count"/></td>
 | 
						|
              <td><xsl:value-of select="@blank"/></td>
 | 
						|
              <td><xsl:value-of select="@comment"/></td>
 | 
						|
              <td><xsl:value-of select="@code"/></td>
 | 
						|
            </tr>
 | 
						|
          </xsl:for-each>
 | 
						|
          </tbody>
 | 
						|
        </table>
 | 
						|
 | 
						|
        <table>
 | 
						|
          <thead>
 | 
						|
          <tr><th colspan="5">Modified</th>
 | 
						|
          </tr>
 | 
						|
            <tr>
 | 
						|
              <th>Language</th>
 | 
						|
              <th>Files</th>
 | 
						|
              <th>Blank</th>
 | 
						|
              <th>Comment</th>
 | 
						|
              <th>Code</th>
 | 
						|
            </tr>
 | 
						|
          </thead>
 | 
						|
          <tbody>
 | 
						|
          <xsl:for-each select="diff_results/modified/language">
 | 
						|
            <tr>
 | 
						|
              <th><xsl:value-of select="@name"/></th>
 | 
						|
              <td><xsl:value-of select="@files_count"/></td>
 | 
						|
              <td><xsl:value-of select="@blank"/></td>
 | 
						|
              <td><xsl:value-of select="@comment"/></td>
 | 
						|
              <td><xsl:value-of select="@code"/></td>
 | 
						|
            </tr>
 | 
						|
          </xsl:for-each>
 | 
						|
          </tbody>
 | 
						|
        </table>
 | 
						|
 | 
						|
        <table>
 | 
						|
          <thead>
 | 
						|
          <tr><th colspan="5">Added</th>
 | 
						|
          </tr>
 | 
						|
            <tr>
 | 
						|
              <th>Language</th>
 | 
						|
              <th>Files</th>
 | 
						|
              <th>Blank</th>
 | 
						|
              <th>Comment</th>
 | 
						|
              <th>Code</th>
 | 
						|
            </tr>
 | 
						|
          </thead>
 | 
						|
          <tbody>
 | 
						|
          <xsl:for-each select="diff_results/added/language">
 | 
						|
            <tr>
 | 
						|
              <th><xsl:value-of select="@name"/></th>
 | 
						|
              <td><xsl:value-of select="@files_count"/></td>
 | 
						|
              <td><xsl:value-of select="@blank"/></td>
 | 
						|
              <td><xsl:value-of select="@comment"/></td>
 | 
						|
              <td><xsl:value-of select="@code"/></td>
 | 
						|
            </tr>
 | 
						|
          </xsl:for-each>
 | 
						|
          </tbody>
 | 
						|
        </table>
 | 
						|
 | 
						|
        <table>
 | 
						|
          <thead>
 | 
						|
          <tr><th colspan="5">Removed</th>
 | 
						|
          </tr>
 | 
						|
            <tr>
 | 
						|
              <th>Language</th>
 | 
						|
              <th>Files</th>
 | 
						|
              <th>Blank</th>
 | 
						|
              <th>Comment</th>
 | 
						|
              <th>Code</th>
 | 
						|
            </tr>
 | 
						|
          </thead>
 | 
						|
          <tbody>
 | 
						|
          <xsl:for-each select="diff_results/removed/language">
 | 
						|
            <tr>
 | 
						|
              <th><xsl:value-of select="@name"/></th>
 | 
						|
              <td><xsl:value-of select="@files_count"/></td>
 | 
						|
              <td><xsl:value-of select="@blank"/></td>
 | 
						|
              <td><xsl:value-of select="@comment"/></td>
 | 
						|
              <td><xsl:value-of select="@code"/></td>
 | 
						|
            </tr>
 | 
						|
          </xsl:for-each>
 | 
						|
          </tbody>
 | 
						|
        </table>
 | 
						|
EO_DIFF_XSL
 | 
						|
# 2}}}
 | 
						|
 | 
						|
    }
 | 
						|
 | 
						|
    $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
 | 
						|
      </body>
 | 
						|
    </html>
 | 
						|
  </xsl:template>
 | 
						|
</xsl:stylesheet>
 | 
						|
EO_DIFF_XSL
 | 
						|
# 2}}}
 | 
						|
    if ($opt_diff) {
 | 
						|
        write_file($CLOC_XSL, {}, ( $XSL_DIFF ) );
 | 
						|
    } else {
 | 
						|
        write_file($CLOC_XSL, {}, ( $XSL ) );
 | 
						|
    }
 | 
						|
    print "<- write_xsl_file\n" if $opt_v > 2;
 | 
						|
} # 1}}}
 | 
						|
sub normalize_file_names {                   # {{{1
 | 
						|
    print "-> normalize_file_names\n" if $opt_v > 2;
 | 
						|
    my (@files, ) = @_;
 | 
						|
 | 
						|
    # Returns a hash of file names reduced to a canonical form
 | 
						|
    # (fully qualified file names, all path separators changed to /,
 | 
						|
    # Windows file names lowercased).  Hash values are the original
 | 
						|
    # file name.
 | 
						|
 | 
						|
    my %normalized = ();
 | 
						|
    foreach my $F (@files) {
 | 
						|
        my $F_norm = $F;
 | 
						|
        if ($ON_WINDOWS) {
 | 
						|
            $F_norm = lc $F_norm; # for case insensitive file name comparisons
 | 
						|
            $F_norm =~ s{\\}{/}g; # Windows directory separators to Unix
 | 
						|
            $F_norm =~ s{^\./}{}g;  # remove leading ./
 | 
						|
            if (($F_norm !~ m{^/}) and ($F_norm !~ m{^\w:/})) {
 | 
						|
                # looks like a relative path; prefix with cwd
 | 
						|
                $F_norm = lc "$cwd/$F_norm";
 | 
						|
            }
 | 
						|
        } else {
 | 
						|
            $F_norm =~ s{^\./}{}g;  # remove leading ./
 | 
						|
            if ($F_norm !~ m{^/}) {
 | 
						|
                # looks like a relative path; prefix with cwd
 | 
						|
                $F_norm = "$cwd/$F_norm";
 | 
						|
            }
 | 
						|
        }
 | 
						|
        # Remove trailing / so it does not interfere with further regex code
 | 
						|
        # that does not expect it
 | 
						|
        $F_norm =~ s{/+$}{};
 | 
						|
        $normalized{ $F_norm } = $F;
 | 
						|
    }
 | 
						|
    print "<- normalize_file_names\n" if $opt_v > 2;
 | 
						|
    return %normalized;
 | 
						|
} # 1}}}
 | 
						|
sub combine_diffs {                          # {{{1
 | 
						|
    # subroutine by Andy (awalshe@sf.net)
 | 
						|
    # https://sourceforge.net/tracker/?func=detail&aid=3261017&group_id=174787&atid=870625
 | 
						|
    my ($ra_files) = @_;
 | 
						|
    print "-> combine_diffs\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my $res   = "$URL v $VERSION\n";
 | 
						|
    my $dl    = '-';
 | 
						|
    my $width = 79;
 | 
						|
    # columns are in this order
 | 
						|
    my @cols  = ('files', 'blank', 'comment', 'code');
 | 
						|
    my %HoH   = ();
 | 
						|
 | 
						|
    foreach my $file (@{$ra_files}) {
 | 
						|
        my $IN = open_file('<', $file, 1);
 | 
						|
        if (!defined $IN) {
 | 
						|
            warn "Unable to read $file; ignoring.\n";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
 | 
						|
        my $sec;
 | 
						|
        while (<$IN>) {
 | 
						|
            chomp;
 | 
						|
            s/\cM$//;
 | 
						|
            next if /^(http|Language|-----)/;
 | 
						|
            if (/^[A-Za-z0-9]+/) {        # section title
 | 
						|
                $sec = $_;
 | 
						|
                chomp($sec);
 | 
						|
                $HoH{$sec} = () if ! exists $HoH{$sec};
 | 
						|
                next;
 | 
						|
            }
 | 
						|
 | 
						|
            if (/^\s(same|modified|added|removed)/) {  # calculated totals row
 | 
						|
                my @ar = grep { $_ ne '' } split(/ /, $_);
 | 
						|
                chomp(@ar);
 | 
						|
                my $ttl = shift @ar;
 | 
						|
                my $i = 0;
 | 
						|
                foreach(@ar) {
 | 
						|
                    my $t = "${ttl}${dl}${cols[$i]}";
 | 
						|
                    $HoH{$sec}{$t} = 0 if ! exists $HoH{$sec}{$t};
 | 
						|
                    $HoH{$sec}{$t} += $_;
 | 
						|
                    $i++;
 | 
						|
                }
 | 
						|
            }
 | 
						|
        }
 | 
						|
        $IN->close;
 | 
						|
    }
 | 
						|
 | 
						|
    # rows are in this order
 | 
						|
    my @rows = ('same', 'modified', 'added', 'removed');
 | 
						|
 | 
						|
    $res .= sprintf("%s\n", "-" x $width);
 | 
						|
    $res .= sprintf("%-19s %14s %14s %14s %14s\n", 'Language',
 | 
						|
                    $cols[0], $cols[1], $cols[2], $cols[3]);
 | 
						|
    $res .= sprintf("%s\n", "-" x $width);
 | 
						|
 | 
						|
    # no inputs? %HoH will be empty
 | 
						|
    return $res unless %HoH;
 | 
						|
 | 
						|
    for my $sec ( keys %HoH ) {
 | 
						|
        next if $sec =~ /SUM:/;
 | 
						|
        next unless defined $HoH{$sec};  # eg, the header line
 | 
						|
        $res .= "$sec\n";
 | 
						|
        foreach (@rows) {
 | 
						|
            $res .= sprintf(" %-18s %14s %14s %14s %14s\n",
 | 
						|
                            $_, $HoH{$sec}{"${_}${dl}${cols[0]}"},
 | 
						|
                                $HoH{$sec}{"${_}${dl}${cols[1]}"},
 | 
						|
                                $HoH{$sec}{"${_}${dl}${cols[2]}"},
 | 
						|
                                $HoH{$sec}{"${_}${dl}${cols[3]}"});
 | 
						|
        }
 | 
						|
    }
 | 
						|
    $res .= sprintf("%s\n", "-" x $width);
 | 
						|
    my $sec = 'SUM:';
 | 
						|
    $res .= "$sec\n";
 | 
						|
    foreach (@rows) {
 | 
						|
        $res .= sprintf(" %-18s %14s %14s %14s %14s\n",
 | 
						|
                        $_, $HoH{$sec}{"${_}${dl}${cols[0]}"},
 | 
						|
                            $HoH{$sec}{"${_}${dl}${cols[1]}"},
 | 
						|
                            $HoH{$sec}{"${_}${dl}${cols[2]}"},
 | 
						|
                            $HoH{$sec}{"${_}${dl}${cols[3]}"});
 | 
						|
    }
 | 
						|
    $res .= sprintf("%s\n", "-" x $width);
 | 
						|
 | 
						|
    print "<- combine_diffs\n" if $opt_v > 2;
 | 
						|
    return $res;
 | 
						|
} # 1}}}
 | 
						|
sub combine_csv_diffs {                      # {{{1
 | 
						|
    my ($delimiter, $ra_files) = @_;
 | 
						|
    print "-> combine_csv_diffs\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my %sum = ();  # sum{ language } = array of 17 values
 | 
						|
    foreach my $file (@{$ra_files}) {
 | 
						|
        my $IN = open_file('<', $file, 1);
 | 
						|
        if (!defined $IN) {
 | 
						|
            warn "Unable to read $file; ignoring.\n";
 | 
						|
            next;
 | 
						|
        }
 | 
						|
 | 
						|
        my $sec;
 | 
						|
        while (<$IN>) {
 | 
						|
            next if /^Language${delimiter}\s==\sfiles${delimiter}/;
 | 
						|
            chomp;
 | 
						|
            my @words = split(/$delimiter/);
 | 
						|
            my $n_col = scalar(@words);
 | 
						|
            if ($n_col != 18) {
 | 
						|
                warn "combine_csv_diffs(): Parse failure line $. of $file\n";
 | 
						|
                warn "Expected 18 columns, got $n_col\n";
 | 
						|
                die;
 | 
						|
            }
 | 
						|
            my $Lang = $words[0];
 | 
						|
            my @count = map { int($_) } @words[1..16];
 | 
						|
            if (defined $sum{$Lang}) {
 | 
						|
                for (my $i = 0; $i < 16; $i++) {
 | 
						|
                    $sum{$Lang}[$i] += $count[$i];
 | 
						|
                }
 | 
						|
            } else {
 | 
						|
                @{$sum{$Lang}} = @count;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        $IN->close;
 | 
						|
    }
 | 
						|
 | 
						|
    my @header = ("Language", "== files", "!= files", "+ files", "- files",
 | 
						|
                  "== blank", "!= blank", "+ blank", "- blank", "== comment",
 | 
						|
                  "!= comment", "+ comment", "- comment", "== code",
 | 
						|
                  "!= code", "+ code", "- code", "$URL v $VERSION" );
 | 
						|
 | 
						|
    my $res = join("$delimiter ", @header) . "$delimiter\n";
 | 
						|
    foreach my $Lang (sort keys %sum) {
 | 
						|
        $res .= $Lang . "$delimiter ";
 | 
						|
        for (my $i = 0; $i < 16; $i++) {
 | 
						|
            $res .= $sum{$Lang}[$i] . "$delimiter ";
 | 
						|
        }
 | 
						|
        $res .= "\n";
 | 
						|
    }
 | 
						|
 | 
						|
    print "<- combine_csv_diffs\n" if $opt_v > 2;
 | 
						|
    return $res;
 | 
						|
} # 1}}}
 | 
						|
sub get_time {                               # {{{1
 | 
						|
    if ($HAVE_Time_HiRes) {
 | 
						|
        return Time::HiRes::time();
 | 
						|
    } else {
 | 
						|
        return time();
 | 
						|
    }
 | 
						|
} # 1}}}
 | 
						|
sub really_is_D {                            # {{{1
 | 
						|
    # Ref bug 131, files ending with .d could be init.d scripts
 | 
						|
    # instead of D language source files.
 | 
						|
    my ($file        , # in
 | 
						|
        $rh_Err      , # in   hash of error codes
 | 
						|
        $raa_errors  , # out
 | 
						|
       ) = @_;
 | 
						|
    print "-> really_is_D($file)\n" if $opt_v > 2;
 | 
						|
    my ($possible_script, $L) = peek_at_first_line($file, $rh_Err, $raa_errors);
 | 
						|
 | 
						|
    print "<- really_is_D($file)\n" if $opt_v > 2;
 | 
						|
    return $possible_script;    # null string if D, otherwise a language
 | 
						|
} # 1}}}
 | 
						|
sub no_autogen_files {                       # {{{1
 | 
						|
    # ref https://github.com/AlDanial/cloc/issues/151
 | 
						|
    my ($print,) = @_;
 | 
						|
    print "-> no_autogen($print)\n" if $opt_v > 2;
 | 
						|
 | 
						|
    # These sometimes created manually?
 | 
						|
    #               acinclude.m4
 | 
						|
    #               configure.ac
 | 
						|
    #               Makefile.am
 | 
						|
 | 
						|
    my @files = qw (
 | 
						|
                    aclocal.m4
 | 
						|
                    announce-gen
 | 
						|
                    autogen.sh
 | 
						|
                    bootstrap
 | 
						|
                    compile
 | 
						|
                    config.guess
 | 
						|
                    config.h.in
 | 
						|
                    config.rpath
 | 
						|
                    config.status
 | 
						|
                    config.sub
 | 
						|
                    configure
 | 
						|
                    configure.in
 | 
						|
                    depcomp
 | 
						|
                    gendocs.sh
 | 
						|
                    gitlog-to-changelog
 | 
						|
                    git-version-gen
 | 
						|
                    gnupload
 | 
						|
                    gnu-web-doc-update
 | 
						|
                    install-sh
 | 
						|
                    libtool
 | 
						|
                    libtool.m4
 | 
						|
                    link-warning.h
 | 
						|
                    ltmain.sh
 | 
						|
                    lt~obsolete.m4
 | 
						|
                    ltoptions.m4
 | 
						|
                    ltsugar.m4
 | 
						|
                    ltversion.in
 | 
						|
                    ltversion.m4
 | 
						|
                    Makefile.in
 | 
						|
                    mdate-sh
 | 
						|
                    missing
 | 
						|
                    mkinstalldirs
 | 
						|
                    test-driver
 | 
						|
                    texinfo.tex
 | 
						|
                    update-copyright
 | 
						|
                    useless-if-before-free
 | 
						|
                    vc-list-files
 | 
						|
                    ylwrap
 | 
						|
                   );
 | 
						|
 | 
						|
    if ($print) {
 | 
						|
        printf "cloc will ignore these %d files with --no-autogen:\n", scalar @files;
 | 
						|
        foreach my $F (@files) {
 | 
						|
            print "    $F\n";
 | 
						|
        }
 | 
						|
        print "Additionally, Go files with '// Code generated by .* DO NOT EDIT.'\n";
 | 
						|
        print "on the first line are ignored.\n";
 | 
						|
    }
 | 
						|
    print "<- no_autogen()\n" if $opt_v > 2;
 | 
						|
    return @files;
 | 
						|
} # 1}}}
 | 
						|
sub load_from_config_file {                  # {{{1
 | 
						|
    # Supports all options except --config itself which would
 | 
						|
    # be pointless.
 | 
						|
    my ($config_file,
 | 
						|
                                                 $rs_by_file             ,
 | 
						|
                                                 $rs_by_file_by_lang     ,
 | 
						|
                                                 $rs_categorized         ,
 | 
						|
                                                 $rs_counted             ,
 | 
						|
                                                 $rs_include_ext         ,
 | 
						|
                                                 $rs_include_lang        ,
 | 
						|
                                                 $rs_include_content     ,
 | 
						|
                                                 $rs_exclude_content     ,
 | 
						|
                                                 $rs_exclude_lang        ,
 | 
						|
                                                 $rs_exclude_dir         ,
 | 
						|
                                                 $rs_exclude_list_file   ,
 | 
						|
                                                 $rs_explain             ,
 | 
						|
                                                 $rs_extract_with        ,
 | 
						|
                                                 $rs_found               ,
 | 
						|
                                                 $rs_count_diff          ,
 | 
						|
                                                 $rs_diff_list_files     ,
 | 
						|
                                                 $rs_diff                ,
 | 
						|
                                                 $rs_diff_alignment      ,
 | 
						|
                                                 $rs_diff_timeout        ,
 | 
						|
                                                 $rs_timeout             ,
 | 
						|
                                                 $rs_html                ,
 | 
						|
                                                 $rs_ignored             ,
 | 
						|
                                                 $rs_quiet               ,
 | 
						|
                                                 $rs_force_lang_def      ,
 | 
						|
                                                 $rs_read_lang_def       ,
 | 
						|
                                                 $rs_show_ext            ,
 | 
						|
                                                 $rs_show_lang           ,
 | 
						|
                                                 $rs_progress_rate       ,
 | 
						|
                                                 $rs_print_filter_stages ,
 | 
						|
                                                 $rs_report_file         ,
 | 
						|
                                                 $ra_script_lang         ,
 | 
						|
                                                 $rs_sdir                ,
 | 
						|
                                                 $rs_skip_uniqueness     ,
 | 
						|
                                                 $rs_strip_code          ,
 | 
						|
                                                 $rs_strip_comments      ,
 | 
						|
                                                 $rs_original_dir        ,
 | 
						|
                                                 $rs_sum_reports         ,
 | 
						|
                                                 $rs_hide_rate           ,
 | 
						|
                                                 $rs_processes           ,
 | 
						|
                                                 $rs_unicode             ,
 | 
						|
                                                 $rs_3                   ,
 | 
						|
                                                 $rs_v                   ,
 | 
						|
                                                 $rs_vcs                 ,
 | 
						|
                                                 $rs_version             ,
 | 
						|
                                                 $rs_write_lang_def      ,
 | 
						|
                                                 $rs_write_lang_def_incl_dup,
 | 
						|
                                                 $rs_xml                 ,
 | 
						|
                                                 $rs_xsl                 ,
 | 
						|
                                                 $ra_force_lang          ,
 | 
						|
                                                 $rs_lang_no_ext         ,
 | 
						|
                                                 $rs_yaml                ,
 | 
						|
                                                 $rs_csv                 ,
 | 
						|
                                                 $rs_csv_delimiter       ,
 | 
						|
                                                 $rs_json                ,
 | 
						|
                                                 $rs_md                  ,
 | 
						|
                                                 $rs_fullpath            ,
 | 
						|
                                                 $rs_match_f             ,
 | 
						|
                                                 $ra_not_match_f         ,
 | 
						|
                                                 $rs_match_d             ,
 | 
						|
                                                 $ra_not_match_d         ,
 | 
						|
                                                 $rs_list_file           ,
 | 
						|
                                                 $rs_help                ,
 | 
						|
                                                 $rs_skip_win_hidden     ,
 | 
						|
                                                 $rs_read_binary_files   ,
 | 
						|
                                                 $rs_sql                 ,
 | 
						|
                                                 $rs_sql_project         ,
 | 
						|
                                                 $rs_sql_append          ,
 | 
						|
                                                 $rs_sql_style           ,
 | 
						|
                                                 $rs_inline              ,
 | 
						|
                                                 $rs_exclude_ext         ,
 | 
						|
                                                 $rs_ignore_whitespace   ,
 | 
						|
                                                 $rs_ignore_case         ,
 | 
						|
                                                 $rs_ignore_case_ext     ,
 | 
						|
                                                 $rs_follow_links        ,
 | 
						|
                                                 $rs_autoconf            ,
 | 
						|
                                                 $rs_sum_one             ,
 | 
						|
                                                 $rs_by_percent          ,
 | 
						|
                                                 $rs_stdin_name          ,
 | 
						|
                                                 $rs_force_on_windows    ,
 | 
						|
                                                 $rs_force_on_unix       ,
 | 
						|
                                                 $rs_show_os             ,
 | 
						|
                                                 $rs_skip_archive        ,
 | 
						|
                                                 $rs_max_file_size       ,
 | 
						|
                                                 $rs_use_sloccount       ,
 | 
						|
                                                 $rs_no_autogen          ,
 | 
						|
                                                 $rs_force_git           ,
 | 
						|
                                                 $rs_strip_str_comments  ,
 | 
						|
                                                 $rs_file_encoding       ,
 | 
						|
                                                 $rs_docstring_as_code   ,
 | 
						|
                                                 $rs_stat                ,
 | 
						|
        ) = @_;
 | 
						|
        # look for runtime configuration file in
 | 
						|
        #    $ENV{'HOME'}/.config/cloc/options.txt         -> POSIX
 | 
						|
        #    $ENV{'APPDATA'} . 'cloc'
 | 
						|
 | 
						|
    print "-> load_from_config_file($config_file)\n" if $opt_v and $opt_v > 2;
 | 
						|
    if (!is_file($config_file)) {
 | 
						|
        print "<- load_from_config_file() (no such file: $config_file)\n" if $opt_v and $opt_v > 2;
 | 
						|
        return;
 | 
						|
    } elsif (!can_read($config_file)) {
 | 
						|
        print "<- load_from_config_file() (unable to read $config_file)\n" if $opt_v and $opt_v > 2;
 | 
						|
        return;
 | 
						|
    }
 | 
						|
    print "Reading options from $config_file.\n" if defined $opt_v;
 | 
						|
 | 
						|
    my $has_force_lang = @{$ra_force_lang};
 | 
						|
    my $has_script_lang = @{$ra_script_lang};
 | 
						|
    my @lines = read_file($config_file);
 | 
						|
    foreach (@lines) {
 | 
						|
        next if /^\s*$/ or /^\s*#/;
 | 
						|
        s/\s*--//;
 | 
						|
        s/^\s+//;
 | 
						|
        if      (!defined ${$rs_by_file}             and /^(by_file|by-file)/)                                { ${$rs_by_file}            = 1;
 | 
						|
        } elsif (!defined ${$rs_by_file_by_lang}     and /^(by_file_by_lang|by-file-by-lang)/)                { ${$rs_by_file_by_lang}    = 1;
 | 
						|
        } elsif (!defined ${$rs_categorized}         and /^categorized(=|\s+)(.*?)$/)                         { ${$rs_categorized}        = $2;
 | 
						|
        } elsif (!defined ${$rs_counted}             and /^counted(=|\s+)(.*?)$/)                             { ${$rs_counted}            = $2;
 | 
						|
        } elsif (!defined ${$rs_include_ext}         and /^(?:include_ext|include-ext)(=|\s+)(.*?)$/)         { ${$rs_include_ext}        = $2;
 | 
						|
        } elsif (!defined ${$rs_include_lang}        and /^(?:include_lang|include-lang)(=|\s+)(.*?)$/)       { ${$rs_include_lang}       = $2;
 | 
						|
        } elsif (!defined ${$rs_include_content}     and /^(?:include_content|include-content)(=|\s+)(.*?)$/) { ${$rs_include_content}    = $2;
 | 
						|
        } elsif (!defined ${$rs_exclude_content}     and /^(?:exclude_content|exclude-content)(=|\s+)(.*?)$/) { ${$rs_exclude_content}    = $2;
 | 
						|
        } elsif (!defined ${$rs_exclude_lang}        and /^(?:exclude_lang|exclude-lang)(=|\s+)(.*?)$/)       { ${$rs_exclude_lang}       = $2;
 | 
						|
        } elsif (!defined ${$rs_exclude_dir}         and /^(?:exclude_dir|exclude-dir)(=|\s+)(.*?)$/)         { ${$rs_exclude_dir}        = $2;
 | 
						|
        } elsif (!defined ${$rs_explain}             and /^explain(=|\s+)(.*?)$/)                             { ${$rs_explain}            = $2;
 | 
						|
        } elsif (!defined ${$rs_extract_with}        and /^(?:extract_with|extract-with)(=|\s+)(.*?)$/)       { ${$rs_extract_with}       = $2;
 | 
						|
        } elsif (!defined ${$rs_found}               and /^found(=|\s+)(.*?)$/)                               { ${$rs_found}              = $2;
 | 
						|
        } elsif (!defined ${$rs_count_diff}          and /^(count_and_diff|count-and-diff)/)                  { ${$rs_count_diff}         = 1;
 | 
						|
        } elsif (!defined ${$rs_diff_list_files}     and /^(diff_list_files|diff-list-files)/)                { ${$rs_diff_list_files}    = 1;
 | 
						|
        } elsif (!defined ${$rs_diff}                and /^diff/)                                             { ${$rs_diff}               = 1;
 | 
						|
        } elsif (!defined ${$rs_diff_alignment}      and /^(?:diff-alignment|diff_alignment)(=|\s+)(.*?)$/)   { ${$rs_diff_alignment}     = $2;
 | 
						|
        } elsif (!defined ${$rs_diff_timeout}        and /^(?:diff-timeout|diff_timeout)(=|\s+)i/)            { ${$rs_diff_timeout}       = $1;
 | 
						|
        } elsif (!defined ${$rs_timeout}             and /^timeout(=|\s+)i/)                                  { ${$rs_timeout}            = $1;
 | 
						|
        } elsif (!defined ${$rs_html}                and /^html/)                                             { ${$rs_html}               = 1;
 | 
						|
        } elsif (!defined ${$rs_ignored}             and /^ignored(=|\s+)(.*?)$/)                             { ${$rs_ignored}            = $2;
 | 
						|
        } elsif (!defined ${$rs_quiet}               and /^quiet/)                                            { ${$rs_quiet}              = 1;
 | 
						|
        } elsif (!defined ${$rs_force_lang_def}      and /^(?:force_lang_def|force-lang-def)(=|\s+)(.*?)$/)   { ${$rs_force_lang_def}     = $2;
 | 
						|
        } elsif (!defined ${$rs_read_lang_def}       and /^(?:read_lang_def|read-lang-def)(=|\s+)(.*?)$/)     { ${$rs_read_lang_def}      = $2;
 | 
						|
        } elsif (!defined ${$rs_progress_rate}       and /^(?:progress_rate|progress-rate)(=|\s+)(\d+)/)      { ${$rs_progress_rate}      = $2;
 | 
						|
        } elsif (!defined ${$rs_print_filter_stages} and /^(print_filter_stages|print-filter-stages)/)        { ${$rs_print_filter_stages}= 1;
 | 
						|
        } elsif (!defined ${$rs_report_file}         and /^(?:report_file|report-file)(=|\s+)(.*?)$/)         { ${$rs_report_file}        = $2;
 | 
						|
        } elsif (!defined ${$rs_report_file}         and /^out(=|\s+)(.*?)$/)                                 { ${$rs_report_file}        = $2;
 | 
						|
        } elsif (!defined ${$rs_sdir}                and /^sdir(=|\s+)(.*?)$/)                                { ${$rs_sdir}               = $2;
 | 
						|
        } elsif (!defined ${$rs_skip_uniqueness}     and /^(skip_uniqueness|skip-uniqueness)/)                { ${$rs_skip_uniqueness}    = 1;
 | 
						|
        } elsif (!defined ${$rs_strip_code}          and /^(?:strip_code|strip-code)(=|\s+)(.*?)$/)           { ${$rs_strip_code}         = $2;
 | 
						|
        } elsif (!defined ${$rs_strip_comments}      and /^(?:strip_comments|strip-comments)(=|\s+)(.*?)$/)   { ${$rs_strip_comments}     = $2;
 | 
						|
        } elsif (!defined ${$rs_original_dir}        and /^(original_dir|original-dir)/)                      { ${$rs_original_dir}       = 1;
 | 
						|
        } elsif (!defined ${$rs_sum_reports}         and /^(sum_reports|sum-reports)/)                        { ${$rs_sum_reports}        = 1;
 | 
						|
        } elsif (!defined ${$rs_hide_rate}           and /^(hid_rate|hide-rate)/)                             { ${$rs_hide_rate}          = 1;
 | 
						|
        } elsif (!defined ${$rs_processes}           and /^processes(=|\s+)(\d+)/)                            { ${$rs_processes}          = $2;
 | 
						|
        } elsif (!defined ${$rs_unicode}             and /^unicode/)                                          { ${$rs_unicode}            = 1;
 | 
						|
        } elsif (!defined ${$rs_3}                   and /^3/)                                                { ${$rs_3}                  = 1;
 | 
						|
        } elsif (!defined ${$rs_vcs}                 and /^vcs(=|\s+)(\S+)/)                                  { ${$rs_vcs}                = $2;
 | 
						|
        } elsif (!defined ${$rs_version}             and /^version/)                                          { ${$rs_version}            = 1;
 | 
						|
        } elsif (!defined ${$rs_write_lang_def}      and /^(?:write_lang_def|write-lang-def)(=|\s+)(.*?)$/)   { ${$rs_write_lang_def}     = $2;
 | 
						|
        } elsif (!defined ${$rs_write_lang_def_incl_dup} and /^(?:write_lang_def_incl_dup|write-lang-def-incl-dup)(=|\s+)(.*?)$/) { ${$rs_write_lang_def_incl_dup} = $2;
 | 
						|
        } elsif (!defined ${$rs_xml}                 and /^xml/)                                              { ${$rs_xml}                = 1;
 | 
						|
        } elsif (!defined ${$rs_xsl}                 and /^xsl(=|\s+)(.*?)$/)                                 { ${$rs_xsl}                = $2;
 | 
						|
        } elsif (!defined ${$rs_lang_no_ext}         and /^(?:lang_no_ext|lang-no-ext)(=|\s+)(.*?)$/)         { ${$rs_lang_no_ext}        = $2;
 | 
						|
        } elsif (!defined ${$rs_yaml}                and /^yaml/)                                             { ${$rs_yaml}               = 1;
 | 
						|
        } elsif (!defined ${$rs_csv}                 and /^csv/)                                              { ${$rs_csv}                = 1;
 | 
						|
        } elsif (!defined ${$rs_csv_delimiter}       and /^(?:csv_delimiter|csv-delimiter)(=|\s+)(.*?)$/)     { ${$rs_csv_delimiter}      = $2;
 | 
						|
        } elsif (!defined ${$rs_json}                and /^json/)                                             { ${$rs_json}               = 1;
 | 
						|
        } elsif (!defined ${$rs_md}                  and /^md/)                                               { ${$rs_md}                 = 1;
 | 
						|
        } elsif (!defined ${$rs_fullpath}            and /^fullpath/)                                         { ${$rs_fullpath}           = 1;
 | 
						|
        } elsif (!defined ${$rs_match_f}             and /^(?:match_f|match-f)(=|\s+)(.*?)$/)                 { ${$rs_match_f}            = $2;
 | 
						|
        } elsif (!        @{$ra_not_match_f}         and /^(?:not_match_f|not-match-f)(=|\s+)(.*?)$/)         { push @{$ra_not_match_f}   , $2;
 | 
						|
        } elsif (!defined ${$rs_match_d}             and /^(?:match_d|match-d)(=|\s+)(.*?)$/)                 { ${$rs_match_d}            = $2;
 | 
						|
        } elsif (!        @{$ra_not_match_d}         and /^(?:not_match_d|not-match-d)(=|\s+)(.*?)$/)         { push @{$ra_not_match_d}   , $2;
 | 
						|
        } elsif (!defined ${$rs_list_file}           and /^(?:list_file|list-file)(=|\s+)(.*?)$/)             { ${$rs_list_file}          = $2;
 | 
						|
        } elsif (!defined ${$rs_help}                and /^help/)                                             { ${$rs_help}               = 1;
 | 
						|
        } elsif (!defined ${$rs_skip_win_hidden}     and /^(skip_win_hidden|skip-win-hidden)/)                { ${$rs_skip_win_hidden}    = 1;
 | 
						|
        } elsif (!defined ${$rs_read_binary_files}   and /^(read_binary_files|read-binary-files)/)            { ${$rs_read_binary_files}  = 1;
 | 
						|
        } elsif (!defined ${$rs_sql}                 and /^sql(=|\s+)(.*?)$/)                                 { ${$rs_sql}                = $2;
 | 
						|
        } elsif (!defined ${$rs_sql_project}         and /^(?:sql_project|sql-project)(=|\s+)(.*?)$/)         { ${$rs_sql_project}        = $2;
 | 
						|
        } elsif (!defined ${$rs_sql_append}          and /^(sql_append|sql-append)/)                          { ${$rs_sql_append}         = 1;
 | 
						|
        } elsif (!defined ${$rs_sql_style}           and /^(?:sql_style|sql-style)(=|\s+)(.*?)$/)             { ${$rs_sql_style}          = $2;
 | 
						|
        } elsif (!defined ${$rs_inline}              and /^inline/)                                           { ${$rs_inline}             = 1;
 | 
						|
        } elsif (!defined ${$rs_exclude_ext}         and /^(?:exclude_ext|exclude-ext)(=|\s+)(.*?)$/)         { ${$rs_exclude_ext}        = $2;
 | 
						|
        } elsif (!defined ${$rs_ignore_whitespace}   and /^(ignore_whitespace|ignore-whitespace)/)            { ${$rs_ignore_whitespace}  = 1;
 | 
						|
        } elsif (!defined ${$rs_ignore_case_ext}     and /^(ignore_case_ext|ignore-case-ext)/)                { ${$rs_ignore_case_ext}    = 1;
 | 
						|
        } elsif (!defined ${$rs_ignore_case}         and /^(ignore_case|ignore-case)/)                        { ${$rs_ignore_case}        = 1;
 | 
						|
        } elsif (!defined ${$rs_follow_links}        and /^(follow_links|follow-links)/)                      { ${$rs_follow_links}       = 1;
 | 
						|
        } elsif (!defined ${$rs_autoconf}            and /^autoconf/)                                         { ${$rs_autoconf}           = 1;
 | 
						|
        } elsif (!defined ${$rs_sum_one}             and /^(sum_one|sum-one)/)                                { ${$rs_sum_one}            = 1;
 | 
						|
        } elsif (!defined ${$rs_by_percent}          and /^(?:by_percent|by-percent)(=|\s+)(.*?)$/)           { ${$rs_by_percent}         = $2;
 | 
						|
        } elsif (!defined ${$rs_stdin_name}          and /^(?:stdin_name|stdin-name)(=|\s+)(.*?)$/)           { ${$rs_stdin_name}         = $2;
 | 
						|
        } elsif (!defined ${$rs_force_on_windows}    and /^windows/)                                          { ${$rs_force_on_windows}   = 1;
 | 
						|
        } elsif (!defined ${$rs_force_on_unix}       and /^unix/)                                             { ${$rs_force_on_unix}      = 1;
 | 
						|
        } elsif (!defined ${$rs_show_os}             and /^(show_os|show-os)/)                                { ${$rs_show_os}            = 1;
 | 
						|
        } elsif (!defined ${$rs_skip_archive}        and /^(?:skip_archive|skip-archive)(=|\s+)(.*?)$/)       { ${$rs_skip_archive}       = $2;
 | 
						|
        } elsif (!defined ${$rs_max_file_size}       and /^(?:max_file_size|max-file-size)(=|\s+)(\d+)/)      { ${$rs_max_file_size}      = $2;
 | 
						|
        } elsif (!defined ${$rs_use_sloccount}       and /^(use_sloccount|use-sloccount)/)                    { ${$rs_use_sloccount}      = 1;
 | 
						|
        } elsif (!defined ${$rs_no_autogen}          and /^(no_autogen|no-autogen)/)                          { ${$rs_no_autogen}         = 1;
 | 
						|
        } elsif (!defined ${$rs_force_git}           and /^git/)                                              { ${$rs_force_git}          = 1;
 | 
						|
        } elsif (!defined ${$rs_exclude_list_file}   and /^(?:exclude_list_file|exclude-list-file)(=|\s+)(.*?)$/)
 | 
						|
                                                                   { ${$rs_exclude_list_file}  = $2;
 | 
						|
        } elsif (!defined ${$rs_v} and /^(verbose|v)((=|\s+)(\d+))?/) {
 | 
						|
            if (!defined $4) { ${$rs_v} =  0; }
 | 
						|
            else             { ${$rs_v} = $4; }
 | 
						|
        } elsif (!$has_script_lang and /^(?:script_lang|script-lang)(=|\s+)(.*?)$/)         {
 | 
						|
                                                            push @{$ra_script_lang}          , $2;
 | 
						|
        } elsif (!$has_force_lang and /^(?:force_lang|force-lang)(=|\s+)(.*?)$/)           {
 | 
						|
                                                            push @{$ra_force_lang}           , $2;
 | 
						|
        } elsif (!defined ${$rs_show_ext}          and /^(show_ext|show-ext)((=|\s+)(.*))?$/)  {
 | 
						|
            if (!defined $4) { ${$rs_show_ext} =  0; }
 | 
						|
            else             { ${$rs_show_ext} = $4; }
 | 
						|
        } elsif (!defined ${$rs_show_lang}         and /^(show_lang|show-lang)((=|\s+)(.*))?s/){
 | 
						|
            if (!defined $4) { ${$rs_show_lang} =  0; }
 | 
						|
            else             { ${$rs_show_lang} = $4; }
 | 
						|
        } elsif (!defined ${$rs_strip_str_comments}  and /^(strip_str_comments|strip-str-comments)/)     { ${$rs_strip_str_comments} = 1;
 | 
						|
        } elsif (!defined ${$rs_file_encoding}       and /^(?:file_encoding|file-encoding)(=|\s+)(\S+)/) { ${$rs_file_encoding}      = $2;
 | 
						|
        } elsif (!defined ${$rs_docstring_as_code}   and /^(docstring_as_code|docstring-as-code)/)       { ${$rs_docstring_as_code}  = 1;
 | 
						|
        } elsif (!defined ${$rs_stat}                and /stat/)                                         { ${$rs_stat}               = 1;
 | 
						|
        }
 | 
						|
 | 
						|
    }
 | 
						|
} # 1}}}
 | 
						|
sub trick_pp_packer_encode {                 # {{{1
 | 
						|
    use Encode;
 | 
						|
    # PAR::Packer gives 'Unknown PerlIO layer "encoding"' unless it is
 | 
						|
    # forced into using this module.
 | 
						|
    my ($OUT, $JunkFile) = tempfile(UNLINK => 1);  # delete on exit
 | 
						|
    open($OUT, "> :encoding(utf8)", $JunkFile);
 | 
						|
    close($OUT);
 | 
						|
}
 | 
						|
# 1}}}
 | 
						|
sub really_is_smarty {                       # {{{1
 | 
						|
    # Given filename, returns TRUE if its contents look like Smarty template
 | 
						|
    my ($filename, ) = @_;
 | 
						|
 | 
						|
    print "-> really_is_smarty($filename)\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my @lines = read_file($filename);
 | 
						|
 | 
						|
    my $points = 0;
 | 
						|
    foreach my $L (@lines) {
 | 
						|
        if (($L =~ /\{(if|include)\s/) or
 | 
						|
            ($L =~ /\{\/if\}/)         or
 | 
						|
            ($L =~ /(\{\*|\*\})/)      or
 | 
						|
            ($L =~ /\{\$\w/)) {
 | 
						|
            ++$points;
 | 
						|
        }
 | 
						|
        last if $points >= 2;
 | 
						|
    }
 | 
						|
    print "<- really_is_smarty(points=$points)\n" if $opt_v > 2;
 | 
						|
    return $points >= 2;
 | 
						|
} # 1}}}
 | 
						|
sub check_alternate_config_files {           # {{{1
 | 
						|
    my ($list_file, $exclude_list_file, $read_lang_def,
 | 
						|
        $force_lang_def, $diff_list_file, ) = @_;
 | 
						|
    my $found_it = "";
 | 
						|
    foreach my $file ($list_file,
 | 
						|
                      $exclude_list_file,
 | 
						|
                      $read_lang_def,
 | 
						|
                      $force_lang_def,
 | 
						|
                      $diff_list_file ) {
 | 
						|
        next unless defined $file;
 | 
						|
        my $dir = dirname $file;
 | 
						|
        next unless can_read($dir) and is_dir($dir);
 | 
						|
        my $bn = basename $config_file;
 | 
						|
        if (can_read("$dir/$bn")) {
 | 
						|
            $found_it = "$dir/$bn";
 | 
						|
            print "Using configuration file $found_it\n" if $opt_v;
 | 
						|
            last;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return $found_it;
 | 
						|
}
 | 
						|
# 1}}}
 | 
						|
sub write_null_results {                     # {{{
 | 
						|
    my ($json, $xml, $report_file,) = @_;
 | 
						|
    print "-> write_null_results\n" if $opt_v > 2;
 | 
						|
    if ((defined $json) or (defined $xml)) {
 | 
						|
        my $line = "";
 | 
						|
        if (defined $json) {
 | 
						|
            $line = "{}";
 | 
						|
        } else {
 | 
						|
            $line = '<?xml version="1.0" encoding="UTF-8"?><results/>';
 | 
						|
        }
 | 
						|
        if (defined $report_file) {
 | 
						|
            open OUT, ">$report_file" or die "Cannot write to $report_file $!\n";
 | 
						|
            print OUT "$line\n";
 | 
						|
            close OUT;
 | 
						|
        } else {
 | 
						|
            print "$line\n";
 | 
						|
        }
 | 
						|
    }
 | 
						|
    print "<- write_null_results\n" if $opt_v > 2;
 | 
						|
} # }}}
 | 
						|
sub glob2regex {                             # {{{
 | 
						|
    # convert simple xpath-style glob pattern to a regex
 | 
						|
    my $globstr = shift;
 | 
						|
    my $re = $globstr;
 | 
						|
    $re =~ s{^["']}{};
 | 
						|
    $re =~ s{^\.\/}{};
 | 
						|
    $re =~ s{["']$}{};
 | 
						|
    $re =~ s{\.}{\\.}g;
 | 
						|
    $re =~ s{\*\*}{\cx}g;  # ctrl x  = .*?
 | 
						|
    $re =~ s{\*}{\cy}g;    # ctrl y = [^/]*
 | 
						|
    $re =~ s{\cx}{.*?}g;
 | 
						|
    $re =~ s{\cy}{[^/]*}g;
 | 
						|
    return '^' . $re . '$';
 | 
						|
} # }}}
 | 
						|
sub load_json {                              # {{{1
 | 
						|
    #
 | 
						|
    # Load a cloc-generated JSON string into %contents
 | 
						|
    #   $contents{filename}{blank|comment|code|language} = value
 | 
						|
    # then print in a variety of formats.
 | 
						|
    #
 | 
						|
    my ($json_string, ) = @_;
 | 
						|
    print "-> load_json()\n" if $opt_v > 2;
 | 
						|
 | 
						|
    my %contents = ();
 | 
						|
    my $heading = undef;
 | 
						|
    foreach (split /\n/, $json_string) {
 | 
						|
        if (/^{?"(.*?)"/) {
 | 
						|
            $heading = $1;
 | 
						|
        } else {
 | 
						|
            if (/^\s+"(.*?)"\s*:\s+(\d+(\.\d+)?)\b/) {
 | 
						|
                # numeric value
 | 
						|
                $contents{$heading}{$1} = $2;
 | 
						|
            } elsif (/^\s+"(.*?)"\s*:\s+"(.*?)"/) {
 | 
						|
                $contents{$heading}{$1} = $2;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
    my $url = $contents{'header'}{'cloc_url'};
 | 
						|
    my $ver = $contents{'header'}{'cloc_version'};
 | 
						|
    my $sec = $contents{'header'}{'elapsed_seconds'};
 | 
						|
    my $n_file = $contents{'header'}{'n_files'};
 | 
						|
    my $n_line = $contents{'header'}{'n_lines'};
 | 
						|
    $sec = $sec == 0 ? 1.0e-3 : $sec;
 | 
						|
    my $header = sprintf "%s v %s T=%.2f s (%.1f files/s, %.1f lines/s)",
 | 
						|
                          $url, $ver, $sec, $n_file/$sec, $n_line/$sec;
 | 
						|
    delete $contents{'header'};
 | 
						|
    delete $contents{'SUM'};
 | 
						|
 | 
						|
    my @file_list = (sort { $contents{$b}{'code'} <=>
 | 
						|
                            $contents{$a}{'code'} } keys %contents );
 | 
						|
#die Dumper(\%contents);
 | 
						|
    # Determine column widths for output
 | 
						|
    my $file_len = 0;
 | 
						|
    my $lang_len = 0;
 | 
						|
    foreach my $file (keys %contents) {
 | 
						|
        my $flen = length $file;
 | 
						|
        my $llen = length $contents{$file}{'language'};
 | 
						|
        $file_len = $file_len > $flen ? $file_len : $flen;
 | 
						|
        $lang_len = $lang_len > $llen ? $lang_len : $llen;
 | 
						|
    }
 | 
						|
    print "<- load_json()\n" if $opt_v > 2;
 | 
						|
    return $file_len, $lang_len, $header, %contents;
 | 
						|
}
 | 
						|
# 1}}}
 | 
						|
sub print_format_n {                         # {{{1
 | 
						|
    # by file with
 | 
						|
    # format 1 : Language | files | blank | comment | code
 | 
						|
    # format 2 : Language | files | blank | comment | code | total
 | 
						|
    # format 3 : File | Language | blank | comment | code
 | 
						|
    # format 4 : File | blank | comment | code | total
 | 
						|
    # format 5 : File | Language | blank | comment | code | total
 | 
						|
    my ($format, $file_len, $lang_len, $header, %contents) = @_;
 | 
						|
    print "-> print_format_n($format)\n" if $opt_v > 2;
 | 
						|
    my @prt_lines = ();
 | 
						|
 | 
						|
    # 8 = characters in "Language"
 | 
						|
    $lang_len = max(8, $lang_len);
 | 
						|
    my %str_fmt = (
 | 
						|
        1 => sprintf("%%-%ds  %%7s  %%7s  %%7s  %%7s\n", $lang_len),
 | 
						|
        2 => sprintf("%%-%ds  %%7s  %%7s  %%7s  %%7s  %%7s\n", $lang_len),
 | 
						|
        3 => sprintf("%%-%ds  %%-%ds  %%7s  %%7s  %%7s\n", $file_len, $lang_len),
 | 
						|
        4 => sprintf("%%-%ds  %%7s  %%7s  %%7s  %%7s\n", $file_len),
 | 
						|
        5 => sprintf("%%-%ds  %%-%ds  %%7s  %%7s  %%7s  %%7s\n", $file_len, $lang_len),
 | 
						|
    );
 | 
						|
    my %val_fmt = (
 | 
						|
        1 => sprintf("%%-%ds  %%7d  %%7d  %%7d  %%7d\n", $lang_len),
 | 
						|
        2 => sprintf("%%-%ds  %%7d  %%7d  %%7d  %%7d  %%7d\n", $lang_len),
 | 
						|
        3 => sprintf("%%-%ds  %%-%ds  %%7d  %%7d  %%7d\n", $file_len, $lang_len),
 | 
						|
        4 => sprintf("%%-%ds  %%7d  %%7d  %%7d  %%7d\n", $file_len),
 | 
						|
        5 => sprintf("%%-%ds  %%-%ds  %%7d  %%7d  %%7d  %%7d\n", $file_len, $lang_len),
 | 
						|
    );
 | 
						|
    my %language = ();
 | 
						|
    foreach my $file (keys %contents) {
 | 
						|
        my $lang = $contents{$file}{'language'};
 | 
						|
        $language{$lang}{'files'} += 1;
 | 
						|
        foreach my $category ('blank', 'comment', 'code',) {
 | 
						|
            $language{$lang}{$category} += $contents{$file}{$category};
 | 
						|
            $language{$lang}{'total'}   += $contents{$file}{$category};
 | 
						|
        }
 | 
						|
    }
 | 
						|
    my @file_list = (sort { $contents{$b}{'code'} <=>
 | 
						|
                            $contents{$a}{'code'} } keys %contents );
 | 
						|
    my @lang_list = (sort { $language{$b}{'code'} <=>
 | 
						|
                            $language{$a}{'code'} } keys %language );
 | 
						|
 | 
						|
    my %hyphens = (
 | 
						|
        1 => "-" x ($lang_len + 4*9),
 | 
						|
        2 => "-" x ($lang_len + 5*9),
 | 
						|
        3 => "-" x ($lang_len + $file_len + 2 + 3*9),
 | 
						|
        4 => "-" x ($file_len + 4*9),
 | 
						|
        5 => "-" x ($lang_len + $file_len + 2 + 4*9),
 | 
						|
    );
 | 
						|
    my %col_headings = (
 | 
						|
        1 => ["Language", "files", "blank", "comment", "code"],
 | 
						|
        2 => ["Language", "files", "blank", "comment", "code", "Total"],
 | 
						|
        3 => ["File", "Language", "blank", "comment", "code"],
 | 
						|
        4 => ["File", "blank", "comment", "code", "Total"],
 | 
						|
        5 => ["File", "Language", "blank", "comment", "code", "Total"],
 | 
						|
    );
 | 
						|
 | 
						|
    push @prt_lines, "$header\n";
 | 
						|
    push @prt_lines, "$hyphens{$format}\n";
 | 
						|
    push @prt_lines, sprintf $str_fmt{$format}, @{$col_headings{$format}};
 | 
						|
    push @prt_lines, "$hyphens{$format}\n";
 | 
						|
    my ($n_files, $n_blank, $n_comment, $n_code, $n_total) = (0, 0, 0, 0, 0);
 | 
						|
    my @out;
 | 
						|
    if ($format < 3) {
 | 
						|
        # by language
 | 
						|
        foreach my $lang (@lang_list) {
 | 
						|
            my ($nF, $nB, $nCm, $nCo) = ($language{$lang}{'files'},
 | 
						|
                                         $language{$lang}{'blank'},
 | 
						|
                                         $language{$lang}{'comment'},
 | 
						|
                                         $language{$lang}{'code'});
 | 
						|
            if      ($format == 1) {
 | 
						|
                @out = ($lang, $nF, $nB, $nCm, $nCo);
 | 
						|
            } else {
 | 
						|
                @out = ($lang, $nF, $nB, $nCm, $nCo, $nB + $nCm + $nCo);
 | 
						|
            }
 | 
						|
            push @prt_lines, sprintf $val_fmt{$format}, @out;
 | 
						|
            $n_files   += $nF;
 | 
						|
            $n_blank   += $nB;
 | 
						|
            $n_comment += $nCm;
 | 
						|
            $n_code    += $nCo;
 | 
						|
            $n_total   += $nB + $nCm + $nCo;
 | 
						|
        }
 | 
						|
    } else {
 | 
						|
        # by file
 | 
						|
        foreach my $file (@file_list) {
 | 
						|
            my ($nB, $nCm, $nCo) = ($contents{$file}{'blank'},
 | 
						|
                                    $contents{$file}{'comment'},
 | 
						|
                                    $contents{$file}{'code'});
 | 
						|
            my $lang = $contents{$file}{'language'};
 | 
						|
            if      ($format == 1) {
 | 
						|
            } elsif ($format == 3) {
 | 
						|
                @out = ($file, $lang, $nB, $nCm, $nCo);
 | 
						|
            } elsif ($format == 4) {
 | 
						|
                @out = ($file, $nB, $nCm, $nCo, $nB + $nCm + $nCo);
 | 
						|
            } else {
 | 
						|
                @out = ($file, $lang, $nB, $nCm, $nCo, $nB + $nCm + $nCo);
 | 
						|
            }
 | 
						|
            push @prt_lines, sprintf $val_fmt{$format}, @out;
 | 
						|
            $n_blank   += $nB;
 | 
						|
            $n_comment += $nCm;
 | 
						|
            $n_code    += $nCo;
 | 
						|
            $n_total   += $nB + $nCm + $nCo;
 | 
						|
        }
 | 
						|
    }
 | 
						|
    push @prt_lines, "$hyphens{$format}\n";
 | 
						|
    if (scalar @file_list > 1) {
 | 
						|
        if      ($format == 1) {
 | 
						|
            @out = ( "SUM", $n_files, $n_blank, $n_comment, $n_code );
 | 
						|
        } elsif ($format == 2) {
 | 
						|
            @out = ( "SUM", $n_files, $n_blank, $n_comment, $n_code, $n_total );
 | 
						|
        } elsif ($format == 3) {
 | 
						|
            @out = ( "SUM", " ", $n_blank, $n_comment, $n_code );
 | 
						|
        } elsif ($format == 4) {
 | 
						|
            @out = ( "SUM", $n_blank, $n_comment, $n_code, $n_total );
 | 
						|
        } else {
 | 
						|
            @out = ( "SUM", " ", $n_blank, $n_comment, $n_code, $n_total );
 | 
						|
        }
 | 
						|
        push @prt_lines, sprintf $val_fmt{$format}, @out;
 | 
						|
        push @prt_lines, "$hyphens{$format}\n";
 | 
						|
    }
 | 
						|
    return @prt_lines;
 | 
						|
    print "<- print_format_n()\n" if $opt_v > 2;
 | 
						|
} # 1}}}
 | 
						|
# really_is_pascal, really_is_incpascal, really_is_php from SLOCCount
 | 
						|
my %php_files    = ();  # really_is_php()
 | 
						|
sub really_is_pascal {                       # {{{1
 | 
						|
# Given filename, returns TRUE if its contents really are Pascal.
 | 
						|
 | 
						|
# This isn't as obvious as it seems.
 | 
						|
# Many ".p" files are Perl files
 | 
						|
# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p),
 | 
						|
# others are C extractions
 | 
						|
# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p
 | 
						|
# and some files in linuxconf).
 | 
						|
# However, test files in "p2c" really are Pascal, for example.
 | 
						|
 | 
						|
# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p
 | 
						|
# is actually C code.  The heuristics determine that they're not Pascal,
 | 
						|
# but because it ends in ".p" it's not counted as C code either.
 | 
						|
# I believe this is actually correct behavior, because frankly it
 | 
						|
# looks like it's automatically generated (it's a bitmap expressed as code).
 | 
						|
# Rather than guess otherwise, we don't include it in a list of
 | 
						|
# source files.  Let's face it, someone who creates C files ending in ".p"
 | 
						|
# and expects them to be counted by default as C files in SLOCCount needs
 | 
						|
# their head examined.  I suggest examining their head
 | 
						|
# with a sucker rod (see syslogd(8) for more on sucker rods).
 | 
						|
 | 
						|
# This heuristic counts as Pascal such files such as:
 | 
						|
#  /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p
 | 
						|
# Which is hand-generated.  We don't count woven documents now anyway,
 | 
						|
# so this is justifiable.
 | 
						|
 | 
						|
 my $filename = shift;
 | 
						|
 chomp($filename);
 | 
						|
 | 
						|
# The heuristic is as follows: it's Pascal _IF_ it has all of the following
 | 
						|
# (ignoring {...} and (*...*) comments):
 | 
						|
# 1. "^..program NAME" or "^..unit NAME",
 | 
						|
# 2. "procedure", "function", "^..interface", or "^..implementation",
 | 
						|
# 3. a "begin", and
 | 
						|
# 4. it ends with "end.",
 | 
						|
#
 | 
						|
# Or it has all of the following:
 | 
						|
# 1. "^..module NAME" and
 | 
						|
# 2. it ends with "end.".
 | 
						|
#
 | 
						|
# Or it has all of the following:
 | 
						|
# 1. "^..program NAME",
 | 
						|
# 2. a "begin", and
 | 
						|
# 3. it ends with "end.".
 | 
						|
#
 | 
						|
# The "end." requirements in particular filter out non-Pascal.
 | 
						|
#
 | 
						|
# Note (jgb): this does not detect Pascal main files in fpc, like
 | 
						|
# fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in
 | 
						|
# it
 | 
						|
 | 
						|
 my $is_pascal = 0;      # Value to determine.
 | 
						|
 | 
						|
 my $has_program = 0;
 | 
						|
 my $has_unit = 0;
 | 
						|
 my $has_module = 0;
 | 
						|
 my $has_procedure_or_function = 0;
 | 
						|
 my $found_begin = 0;
 | 
						|
 my $found_terminating_end = 0;
 | 
						|
 my $has_begin = 0;
 | 
						|
 | 
						|
 my $PASCAL_FILE = open_file('<', $filename, 0);
 | 
						|
 die "Can't open $filename to determine if it's pascal.\n" if !defined $PASCAL_FILE;
 | 
						|
 while(<$PASCAL_FILE>) {
 | 
						|
   s/\{.*?\}//g;  # Ignore {...} comments on this line; imperfect, but effective.
 | 
						|
   s/\(\*.*?\*\)//g;  # Ignore (*...*) comments on this line; imperfect, but effective.
 | 
						|
   if (m/\bprogram\s+[A-Za-z]/i)  {$has_program=1;}
 | 
						|
   if (m/\bunit\s+[A-Za-z]/i)     {$has_unit=1;}
 | 
						|
   if (m/\bmodule\s+[A-Za-z]/i)   {$has_module=1;}
 | 
						|
   if (m/\bprocedure\b/i)         { $has_procedure_or_function = 1; }
 | 
						|
   if (m/\bfunction\b/i)          { $has_procedure_or_function = 1; }
 | 
						|
   if (m/^\s*interface\s+/i)      { $has_procedure_or_function = 1; }
 | 
						|
   if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; }
 | 
						|
   if (m/\bbegin\b/i) { $has_begin = 1; }
 | 
						|
   # Originally I said:
 | 
						|
   # "This heuristic fails if there are multi-line comments after
 | 
						|
   # "end."; I haven't seen that in real Pascal programs:"
 | 
						|
   # But jgb found there are a good quantity of them in Debian, specially in
 | 
						|
   # fpc (at the end of a lot of files there is a multiline comment
 | 
						|
   # with the changelog for the file).
 | 
						|
   # Therefore, assume Pascal if "end." appears anywhere in the file.
 | 
						|
   if (m/end\.\s*$/i) {$found_terminating_end = 1;}
 | 
						|
#   elsif (m/\S/) {$found_terminating_end = 0;}
 | 
						|
 }
 | 
						|
 close($PASCAL_FILE);
 | 
						|
 | 
						|
 # Okay, we've examined the entire file looking for clues;
 | 
						|
 # let's use those clues to determine if it's really Pascal:
 | 
						|
 | 
						|
 if ( ( ($has_unit || $has_program) && $has_procedure_or_function &&
 | 
						|
     $has_begin && $found_terminating_end ) ||
 | 
						|
      ( $has_module && $found_terminating_end ) ||
 | 
						|
      ( $has_program && $has_begin && $found_terminating_end ) )
 | 
						|
          {$is_pascal = 1;}
 | 
						|
 | 
						|
 return $is_pascal;
 | 
						|
} # 1}}}
 | 
						|
sub really_is_incpascal {                    # {{{1
 | 
						|
# Given filename, returns TRUE if its contents really are Pascal.
 | 
						|
# For .inc files (mainly seen in fpc)
 | 
						|
 | 
						|
 my $filename = shift;
 | 
						|
 chomp($filename);
 | 
						|
 | 
						|
# The heuristic is as follows: it is Pascal if any of the following:
 | 
						|
# 1. really_is_pascal returns true
 | 
						|
# 2. Any usual reserved word is found (program, unit, const, begin...)
 | 
						|
 | 
						|
 # If the general routine for Pascal files works, we have it
 | 
						|
 if (really_is_pascal($filename)) {
 | 
						|
   return 1;
 | 
						|
 }
 | 
						|
 | 
						|
 my $is_pascal = 0;      # Value to determine.
 | 
						|
 my $found_begin = 0;
 | 
						|
 | 
						|
 my $PASCAL_FILE = open_file('<', $filename, 0);
 | 
						|
 die "Can't open $filename to determine if it's pascal.\n" if !defined $PASCAL_FILE;
 | 
						|
 while(<$PASCAL_FILE>) {
 | 
						|
   s/\{.*?\}//g;  # Ignore {...} comments on this line; imperfect, but effective.
 | 
						|
   s/\(\*.*?\*\)//g;  # Ignore (*...*) comments on this line; imperfect, but effective.
 | 
						|
   if (m/\bprogram\s+[A-Za-z]/i)  {$is_pascal=1;}
 | 
						|
   if (m/\bunit\s+[A-Za-z]/i)     {$is_pascal=1;}
 | 
						|
   if (m/\bmodule\s+[A-Za-z]/i)   {$is_pascal=1;}
 | 
						|
   if (m/\bprocedure\b/i)         {$is_pascal = 1; }
 | 
						|
   if (m/\bfunction\b/i)          {$is_pascal = 1; }
 | 
						|
   if (m/^\s*interface\s+/i)      {$is_pascal = 1; }
 | 
						|
   if (m/^\s*implementation\s+/i) {$is_pascal = 1; }
 | 
						|
   if (m/\bconstant\s+/i)         {$is_pascal=1;}
 | 
						|
   if (m/\bbegin\b/i) { $found_begin = 1; }
 | 
						|
   if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;}
 | 
						|
   if ($is_pascal) {
 | 
						|
     last;
 | 
						|
   }
 | 
						|
 }
 | 
						|
 | 
						|
 close($PASCAL_FILE);
 | 
						|
 return $is_pascal;
 | 
						|
} # 1}}}
 | 
						|
sub really_is_php {                          # {{{1
 | 
						|
# Given filename, returns TRUE if its contents really is php.
 | 
						|
 | 
						|
 my $filename = shift;
 | 
						|
 chomp($filename);
 | 
						|
 | 
						|
 my $is_php = 0;      # Value to determine.
 | 
						|
 # Need to find a matching pair of surrounds, with ending after beginning:
 | 
						|
 my $normal_surround = 0;  # <?; bit 0 = <?, bit 1 = ?>
 | 
						|
 my $script_surround = 0;  # <script..>; bit 0 = <script language="php">
 | 
						|
 my $asp_surround = 0;     # <%; bit 0 = <%, bit 1 = %>
 | 
						|
 | 
						|
 # Return cached result, if available:
 | 
						|
 if ($php_files{$filename}) { return $php_files{$filename};}
 | 
						|
 | 
						|
 my $PHP_FILE = open_file('<', $filename, 0);
 | 
						|
 die "Can't open $filename to determine if it's php.\n" if !defined $PHP_FILE;
 | 
						|
 while(<$PHP_FILE>) {
 | 
						|
   if (m/\<\?/)                           { $normal_surround |= 1; }
 | 
						|
   if (m/\?\>/ && ($normal_surround & 1)) { $normal_surround |= 2; }
 | 
						|
   if (m/\<script.*language="?php"?/i)    { $script_surround |= 1; }
 | 
						|
   if (m/\<\/script\>/i && ($script_surround & 1)) { $script_surround |= 2; }
 | 
						|
   if (m/\<\%/)                           { $asp_surround |= 1; }
 | 
						|
   if (m/\%\>/ && ($asp_surround & 1)) { $asp_surround |= 2; }
 | 
						|
 }
 | 
						|
 close($PHP_FILE);
 | 
						|
 | 
						|
 if ( ($normal_surround == 3) || ($script_surround == 3) ||
 | 
						|
      ($asp_surround == 3)) {
 | 
						|
   $is_php = 1;
 | 
						|
 }
 | 
						|
 | 
						|
 $php_files{$filename} = $is_php; # Store result in cache.
 | 
						|
 | 
						|
 return $is_php;
 | 
						|
} # 1}}}
 | 
						|
# vendored modules
 | 
						|
sub Install_Regexp_Common {                  # {{{1
 | 
						|
    # Installs portions of Damian Conway's & Abigail's Regexp::Common
 | 
						|
    # module, version 2017060201 into a temporary directory for the
 | 
						|
    # duration of this run.
 | 
						|
    my %Regexp_Common_Contents = ();
 | 
						|
$Regexp_Common_Contents{'Common'} = <<'EOCommon'; # {{{2
 | 
						|
package Regexp::Common;
 | 
						|
 | 
						|
use 5.10.0;
 | 
						|
use strict;
 | 
						|
 | 
						|
use warnings;
 | 
						|
no  warnings 'syntax';
 | 
						|
 | 
						|
our $VERSION = '2017060201';
 | 
						|
our %RE;
 | 
						|
our %sub_interface;
 | 
						|
our $AUTOLOAD;
 | 
						|
 | 
						|
 | 
						|
sub _croak {
 | 
						|
    require Carp;
 | 
						|
    goto &Carp::croak;
 | 
						|
}
 | 
						|
 | 
						|
sub _carp {
 | 
						|
    require Carp;
 | 
						|
    goto &Carp::carp;
 | 
						|
}
 | 
						|
 | 
						|
sub new {
 | 
						|
    my ($class, @data) = @_;
 | 
						|
    my %self;
 | 
						|
    tie %self, $class, @data;
 | 
						|
    return \%self;
 | 
						|
}
 | 
						|
 | 
						|
sub TIEHASH {
 | 
						|
    my ($class, @data) = @_;
 | 
						|
    bless \@data, $class;
 | 
						|
}
 | 
						|
 | 
						|
sub FETCH {
 | 
						|
    my ($self, $extra) = @_;
 | 
						|
    return bless ref($self)->new(@$self, $extra), ref($self);
 | 
						|
}
 | 
						|
 | 
						|
my %imports = map {$_ => "Regexp::Common::$_"}
 | 
						|
              qw /balanced CC     comment   delimited lingua list
 | 
						|
                  net      number profanity SEN       URI    whitespace
 | 
						|
                  zip/;
 | 
						|
 | 
						|
sub import {
 | 
						|
    shift;  # Shift off the class.
 | 
						|
    tie %RE, __PACKAGE__;
 | 
						|
    {
 | 
						|
        no strict 'refs';
 | 
						|
        *{caller() . "::RE"} = \%RE;
 | 
						|
    }
 | 
						|
 | 
						|
    my $saw_import;
 | 
						|
    my $no_defaults;
 | 
						|
    my %exclude;
 | 
						|
    foreach my $entry (grep {!/^RE_/} @_) {
 | 
						|
        if ($entry eq 'pattern') {
 | 
						|
            no strict 'refs';
 | 
						|
            *{caller() . "::pattern"} = \&pattern;
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        # This used to prevent $; from being set. We still recognize it,
 | 
						|
        # but we won't do anything.
 | 
						|
        if ($entry eq 'clean') {
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        if ($entry eq 'no_defaults') {
 | 
						|
            $no_defaults ++;
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        if (my $module = $imports {$entry}) {
 | 
						|
            $saw_import ++;
 | 
						|
            eval "require $module;";
 | 
						|
            die $@ if $@;
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        if ($entry =~ /^!(.*)/ && $imports {$1}) {
 | 
						|
            $exclude {$1} ++;
 | 
						|
            next;
 | 
						|
        }
 | 
						|
        # As a last resort, try to load the argument.
 | 
						|
        my $module = $entry =~ /^Regexp::Common/
 | 
						|
                            ? $entry
 | 
						|
                            : "Regexp::Common::" . $entry;
 | 
						|
        eval "require $module;";
 | 
						|
        die $@ if $@;
 | 
						|
    }
 | 
						|
 | 
						|
    unless ($saw_import || $no_defaults) {
 | 
						|
        foreach my $module (values %imports) {
 | 
						|
            next if $exclude {$module};
 | 
						|
            eval "require $module;";
 | 
						|
            die $@ if $@;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    my %exported;
 | 
						|
    foreach my $entry (grep {/^RE_/} @_) {
 | 
						|
        if ($entry =~ /^RE_(\w+_)?ALL$/) {
 | 
						|
            my $m  = defined $1 ? $1 : "";
 | 
						|
            my $re = qr /^RE_${m}.*$/;
 | 
						|
            while (my ($sub, $interface) = each %sub_interface) {
 | 
						|
                next if $exported {$sub};
 | 
						|
                next unless $sub =~ /$re/;
 | 
						|
                {
 | 
						|
                    no strict 'refs';
 | 
						|
                    *{caller() . "::$sub"} = $interface;
 | 
						|
                }
 | 
						|
                $exported {$sub} ++;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            next if $exported {$entry};
 | 
						|
            _croak "Can't export unknown subroutine &$entry"
 | 
						|
                unless $sub_interface {$entry};
 | 
						|
            {
 | 
						|
                no strict 'refs';
 | 
						|
                *{caller() . "::$entry"} = $sub_interface {$entry};
 | 
						|
            }
 | 
						|
            $exported {$entry} ++;
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
sub AUTOLOAD { _croak "Can't $AUTOLOAD" }
 | 
						|
 | 
						|
sub DESTROY {}
 | 
						|
 | 
						|
my %cache;
 | 
						|
 | 
						|
my $fpat = qr/^(-\w+)/;
 | 
						|
 | 
						|
sub _decache {
 | 
						|
        my @args = @{tied %{$_[0]}};
 | 
						|
        my @nonflags = grep {!/$fpat/} @args;
 | 
						|
        my $cache = get_cache(@nonflags);
 | 
						|
        _croak "Can't create unknown regex: \$RE{"
 | 
						|
            . join("}{",@args) . "}"
 | 
						|
                unless exists $cache->{__VAL__};
 | 
						|
        _croak "Perl $] does not support the pattern "
 | 
						|
            . "\$RE{" . join("}{",@args)
 | 
						|
            . "}.\nYou need Perl $cache->{__VAL__}{version} or later"
 | 
						|
                unless ($cache->{__VAL__}{version}||0) <= $];
 | 
						|
        my %flags = ( %{$cache->{__VAL__}{default}},
 | 
						|
                      map { /$fpat\Q$;\E(.*)/ ? ($1 => $2)
 | 
						|
                          : /$fpat/           ? ($1 => undef)
 | 
						|
                          :                     ()
 | 
						|
                          } @args);
 | 
						|
        $cache->{__VAL__}->_clone_with(\@args, \%flags);
 | 
						|
}
 | 
						|
 | 
						|
use overload q{""} => \&_decache;
 | 
						|
 | 
						|
 | 
						|
sub get_cache {
 | 
						|
        my $cache = \%cache;
 | 
						|
        foreach (@_) {
 | 
						|
                $cache = $cache->{$_}
 | 
						|
                      || ($cache->{$_} = {});
 | 
						|
        }
 | 
						|
        return $cache;
 | 
						|
}
 | 
						|
 | 
						|
sub croak_version {
 | 
						|
        my ($entry, @args) = @_;
 | 
						|
}
 | 
						|
 | 
						|
sub pattern {
 | 
						|
        my %spec = @_;
 | 
						|
        _croak 'pattern() requires argument: name => [ @list ]'
 | 
						|
                unless $spec{name} && ref $spec{name} eq 'ARRAY';
 | 
						|
        _croak 'pattern() requires argument: create => $sub_ref_or_string'
 | 
						|
                unless $spec{create};
 | 
						|
 | 
						|
        if (ref $spec{create} ne "CODE") {
 | 
						|
                my $fixed_str = "$spec{create}";
 | 
						|
                $spec{create} = sub { $fixed_str }
 | 
						|
        }
 | 
						|
 | 
						|
        my @nonflags;
 | 
						|
        my %default;
 | 
						|
        foreach ( @{$spec{name}} ) {
 | 
						|
                if (/$fpat=(.*)/) {
 | 
						|
                        $default{$1} = $2;
 | 
						|
                }
 | 
						|
                elsif (/$fpat\s*$/) {
 | 
						|
                        $default{$1} = undef;
 | 
						|
                }
 | 
						|
                else {
 | 
						|
                        push @nonflags, $_;
 | 
						|
                }
 | 
						|
        }
 | 
						|
 | 
						|
        my $entry = get_cache(@nonflags);
 | 
						|
 | 
						|
        if ($entry->{__VAL__}) {
 | 
						|
                _carp "Overriding \$RE{"
 | 
						|
                   . join("}{",@nonflags)
 | 
						|
                   . "}";
 | 
						|
        }
 | 
						|
 | 
						|
        $entry->{__VAL__} = bless {
 | 
						|
                                create  => $spec{create},
 | 
						|
                                match   => $spec{match} || \&generic_match,
 | 
						|
                                subs    => $spec{subs}  || \&generic_subs,
 | 
						|
                                version => $spec{version},
 | 
						|
                                default => \%default,
 | 
						|
                            }, 'Regexp::Common::Entry';
 | 
						|
 | 
						|
        foreach (@nonflags) {s/\W/X/g}
 | 
						|
        my $subname = "RE_" . join ("_", @nonflags);
 | 
						|
        $sub_interface{$subname} = sub {
 | 
						|
                push @_ => undef if @_ % 2;
 | 
						|
                my %flags = @_;
 | 
						|
                my $pat = $spec{create}->($entry->{__VAL__},
 | 
						|
                               {%default, %flags}, \@nonflags);
 | 
						|
                if (exists $flags{-keep}) { $pat =~ s/\Q(?k:/(/g; }
 | 
						|
                else { $pat =~ s/\Q(?k:/(?:/g; }
 | 
						|
                return exists $flags {-i} ? qr /(?i:$pat)/ : qr/$pat/;
 | 
						|
        };
 | 
						|
 | 
						|
        return 1;
 | 
						|
}
 | 
						|
 | 
						|
sub generic_match {$_ [1] =~  /$_[0]/}
 | 
						|
sub generic_subs  {$_ [1] =~ s/$_[0]/$_[2]/}
 | 
						|
 | 
						|
sub matches {
 | 
						|
        my ($self, $str) = @_;
 | 
						|
        my $entry = $self -> _decache;
 | 
						|
        $entry -> {match} -> ($entry, $str);
 | 
						|
}
 | 
						|
 | 
						|
sub subs {
 | 
						|
        my ($self, $str, $newstr) = @_;
 | 
						|
        my $entry = $self -> _decache;
 | 
						|
        $entry -> {subs} -> ($entry, $str, $newstr);
 | 
						|
        return $str;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
package Regexp::Common::Entry;
 | 
						|
# use Carp;
 | 
						|
 | 
						|
use overload
 | 
						|
    q{""} => sub {
 | 
						|
        my ($self) = @_;
 | 
						|
        my $pat = $self->{create}->($self, $self->{flags}, $self->{args});
 | 
						|
        if (exists $self->{flags}{-keep}) {
 | 
						|
            $pat =~ s/\Q(?k:/(/g;
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            $pat =~ s/\Q(?k:/(?:/g;
 | 
						|
        }
 | 
						|
        if (exists $self->{flags}{-i})   { $pat = "(?i)$pat" }
 | 
						|
        return $pat;
 | 
						|
    };
 | 
						|
 | 
						|
sub _clone_with {
 | 
						|
    my ($self, $args, $flags) = @_;
 | 
						|
    bless { %$self, args=>$args, flags=>$flags }, ref $self;
 | 
						|
}
 | 
						|
 | 
						|
1;
 | 
						|
 | 
						|
__END__
 | 
						|
 | 
						|
=pod
 | 
						|
 | 
						|
=head1 NAME
 | 
						|
 | 
						|
Regexp::Common - Provide commonly requested regular expressions
 | 
						|
 | 
						|
=head1 SYNOPSIS
 | 
						|
 | 
						|
 # STANDARD USAGE
 | 
						|
 | 
						|
 use Regexp::Common;
 | 
						|
 | 
						|
 while (<>) {
 | 
						|
     /$RE{num}{real}/               and print q{a number};
 | 
						|
     /$RE{quoted}/                  and print q{a ['"`] quoted string};
 | 
						|
    m[$RE{delimited}{-delim=>'/'}]  and print q{a /.../ sequence};
 | 
						|
     /$RE{balanced}{-parens=>'()'}/ and print q{balanced parentheses};
 | 
						|
     /$RE{profanity}/               and print q{a #*@%-ing word};
 | 
						|
 }
 | 
						|
 | 
						|
 | 
						|
 # SUBROUTINE-BASED INTERFACE
 | 
						|
 | 
						|
 use Regexp::Common 'RE_ALL';
 | 
						|
 | 
						|
 while (<>) {
 | 
						|
     $_ =~ RE_num_real()              and print q{a number};
 | 
						|
     $_ =~ RE_quoted()                and print q{a ['"`] quoted string};
 | 
						|
     $_ =~ RE_delimited(-delim=>'/')  and print q{a /.../ sequence};
 | 
						|
     $_ =~ RE_balanced(-parens=>'()'} and print q{balanced parentheses};
 | 
						|
     $_ =~ RE_profanity()             and print q{a #*@%-ing word};
 | 
						|
 }
 | 
						|
 | 
						|
 | 
						|
 # IN-LINE MATCHING...
 | 
						|
 | 
						|
 if ( $RE{num}{int}->matches($text) ) {...}
 | 
						|
 | 
						|
 | 
						|
 # ...AND SUBSTITUTION
 | 
						|
 | 
						|
 my $cropped = $RE{ws}{crop}->subs($uncropped);
 | 
						|
 | 
						|
 | 
						|
 # ROLL-YOUR-OWN PATTERNS
 | 
						|
 | 
						|
 use Regexp::Common 'pattern';
 | 
						|
 | 
						|
 pattern name   => ['name', 'mine'],
 | 
						|
         create => '(?i:J[.]?\s+A[.]?\s+Perl-Hacker)',
 | 
						|
         ;
 | 
						|
 | 
						|
 my $name_matcher = $RE{name}{mine};
 | 
						|
 | 
						|
 pattern name    => [ 'lineof', '-char=_' ],
 | 
						|
         create  => sub {
 | 
						|
                        my $flags = shift;
 | 
						|
                        my $char = quotemeta $flags->{-char};
 | 
						|
                        return '(?:^$char+$)';
 | 
						|
                    },
 | 
						|
         match   => sub {
 | 
						|
                        my ($self, $str) = @_;
 | 
						|
                        return $str !~ /[^$self->{flags}{-char}]/;
 | 
						|
                    },
 | 
						|
         subs   => sub {
 | 
						|
                        my ($self, $str, $replacement) = @_;
 | 
						|
                        $_[1] =~ s/^$self->{flags}{-char}+$//g;
 | 
						|
                   },
 | 
						|
         ;
 | 
						|
 | 
						|
 my $asterisks = $RE{lineof}{-char=>'*'};
 | 
						|
 | 
						|
 # DECIDING WHICH PATTERNS TO LOAD.
 | 
						|
 | 
						|
 use Regexp::Common qw /comment number/;  # Comment and number patterns.
 | 
						|
 use Regexp::Common qw /no_defaults/;     # Don't load any patterns.
 | 
						|
 use Regexp::Common qw /!delimited/;      # All, but delimited patterns.
 | 
						|
 | 
						|
 | 
						|
=head1 DESCRIPTION
 | 
						|
 | 
						|
By default, this module exports a single hash (C<%RE>) that stores or generates
 | 
						|
commonly needed regular expressions (see L<"List of available patterns">).
 | 
						|
 | 
						|
There is an alternative, subroutine-based syntax described in
 | 
						|
L<"Subroutine-based interface">.
 | 
						|
 | 
						|
 | 
						|
=head2 General syntax for requesting patterns
 | 
						|
 | 
						|
To access a particular pattern, C<%RE> is treated as a hierarchical hash of
 | 
						|
hashes (of hashes...), with each successive key being an identifier. For
 | 
						|
example, to access the pattern that matches real numbers, you
 | 
						|
specify:
 | 
						|
 | 
						|
        $RE{num}{real}
 | 
						|
 | 
						|
and to access the pattern that matches integers:
 | 
						|
 | 
						|
        $RE{num}{int}
 | 
						|
 | 
						|
Deeper layers of the hash are used to specify I<flags>: arguments that
 | 
						|
modify the resulting pattern in some way. The keys used to access these
 | 
						|
layers are prefixed with a minus sign and may have a value; if a value
 | 
						|
is given, it's done by using a multidimensional key.
 | 
						|
For example, to access the pattern that
 | 
						|
matches base-2 real numbers with embedded commas separating
 | 
						|
groups of three digits (e.g. 10,101,110.110101101):
 | 
						|
 | 
						|
        $RE{num}{real}{-base => 2}{-sep => ','}{-group => 3}
 | 
						|
 | 
						|
Through the magic of Perl, these flag layers may be specified in any order
 | 
						|
(and even interspersed through the identifier keys!)
 | 
						|
so you could get the same pattern with:
 | 
						|
 | 
						|
        $RE{num}{real}{-sep => ','}{-group => 3}{-base => 2}
 | 
						|
 | 
						|
or:
 | 
						|
 | 
						|
        $RE{num}{-base => 2}{real}{-group => 3}{-sep => ','}
 | 
						|
 | 
						|
or even:
 | 
						|
 | 
						|
        $RE{-base => 2}{-group => 3}{-sep => ','}{num}{real}
 | 
						|
 | 
						|
etc.
 | 
						|
 | 
						|
Note, however, that the relative order of amongst the identifier keys
 | 
						|
I<is> significant. That is:
 | 
						|
 | 
						|
        $RE{list}{set}
 | 
						|
 | 
						|
would not be the same as:
 | 
						|
 | 
						|
        $RE{set}{list}
 | 
						|
 | 
						|
=head2 Flag syntax
 | 
						|
 | 
						|
In versions prior to 2.113, flags could also be written as
 | 
						|
C<{"-flag=value"}>. This no longer works, although C<{"-flag$;value"}>
 | 
						|
still does. However, C<< {-flag => 'value'} >> is the preferred syntax.
 | 
						|
 | 
						|
=head2 Universal flags
 | 
						|
 | 
						|
Normally, flags are specific to a single pattern.
 | 
						|
However, there is two flags that all patterns may specify.
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item C<-keep>
 | 
						|
 | 
						|
By default, the patterns provided by C<%RE> contain no capturing
 | 
						|
parentheses. However, if the C<-keep> flag is specified (it requires
 | 
						|
no value) then any significant substrings that the pattern matches
 | 
						|
are captured. For example:
 | 
						|
 | 
						|
        if ($str =~ $RE{num}{real}{-keep}) {
 | 
						|
                $number   = $1;
 | 
						|
                $whole    = $3;
 | 
						|
                $decimals = $5;
 | 
						|
        }
 | 
						|
 | 
						|
Special care is needed if a "kept" pattern is interpolated into a
 | 
						|
larger regular expression, as the presence of other capturing
 | 
						|
parentheses is likely to change the "number variables" into which significant
 | 
						|
substrings are saved.
 | 
						|
 | 
						|
See also L<"Adding new regular expressions">, which describes how to create
 | 
						|
new patterns with "optional" capturing brackets that respond to C<-keep>.
 | 
						|
 | 
						|
=item C<-i>
 | 
						|
 | 
						|
Some patterns or subpatterns only match lowercase or uppercase letters.
 | 
						|
If one wants the do case insensitive matching, one option is to use
 | 
						|
the C</i> regexp modifier, or the special sequence C<(?i)>. But if the
 | 
						|
functional interface is used, one does not have this option. The
 | 
						|
C<-i> switch solves this problem; by using it, the pattern will do
 | 
						|
case insensitive matching.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head2 OO interface and inline matching/substitution
 | 
						|
 | 
						|
The patterns returned from C<%RE> are objects, so rather than writing:
 | 
						|
 | 
						|
        if ($str =~ /$RE{some}{pattern}/ ) {...}
 | 
						|
 | 
						|
you can write:
 | 
						|
 | 
						|
        if ( $RE{some}{pattern}->matches($str) ) {...}
 | 
						|
 | 
						|
For matching this would seem to have no great advantage apart from readability
 | 
						|
(but see below).
 | 
						|
 | 
						|
For substitutions, it has other significant benefits. Frequently you want to
 | 
						|
perform a substitution on a string without changing the original. Most people
 | 
						|
use this:
 | 
						|
 | 
						|
        $changed = $original;
 | 
						|
        $changed =~ s/$RE{some}{pattern}/$replacement/;
 | 
						|
 | 
						|
The more adept use:
 | 
						|
 | 
						|
        ($changed = $original) =~ s/$RE{some}{pattern}/$replacement/;
 | 
						|
 | 
						|
Regexp::Common allows you do write this:
 | 
						|
 | 
						|
        $changed = $RE{some}{pattern}->subs($original=>$replacement);
 | 
						|
 | 
						|
Apart from reducing precedence-angst, this approach has the added
 | 
						|
advantages that the substitution behaviour can be optimized from the
 | 
						|
regular expression, and the replacement string can be provided by
 | 
						|
default (see L<"Adding new regular expressions">).
 | 
						|
 | 
						|
For example, in the implementation of this substitution:
 | 
						|
 | 
						|
        $cropped = $RE{ws}{crop}->subs($uncropped);
 | 
						|
 | 
						|
the default empty string is provided automatically, and the substitution is
 | 
						|
optimized to use:
 | 
						|
 | 
						|
        $uncropped =~ s/^\s+//;
 | 
						|
        $uncropped =~ s/\s+$//;
 | 
						|
 | 
						|
rather than:
 | 
						|
 | 
						|
        $uncropped =~ s/^\s+|\s+$//g;
 | 
						|
 | 
						|
 | 
						|
=head2 Subroutine-based interface
 | 
						|
 | 
						|
The hash-based interface was chosen because it allows regexes to be
 | 
						|
effortlessly interpolated, and because it also allows them to be
 | 
						|
"curried". For example:
 | 
						|
 | 
						|
        my $num = $RE{num}{int};
 | 
						|
 | 
						|
        my $command    = $num->{-sep=>','}{-group=>3};
 | 
						|
        my $duodecimal = $num->{-base=>12};
 | 
						|
 | 
						|
 | 
						|
However, the use of tied hashes does make the access to Regexp::Common
 | 
						|
patterns slower than it might otherwise be. In contexts where impatience
 | 
						|
overrules laziness, Regexp::Common provides an additional
 | 
						|
subroutine-based interface.
 | 
						|
 | 
						|
For each (sub-)entry in the C<%RE> hash (C<$RE{key1}{key2}{etc}>), there
 | 
						|
is a corresponding exportable subroutine: C<RE_key1_key2_etc()>. The name of
 | 
						|
each subroutine is the underscore-separated concatenation of the I<non-flag>
 | 
						|
keys that locate the same pattern in C<%RE>. Flags are passed to the subroutine
 | 
						|
in its argument list. Thus:
 | 
						|
 | 
						|
        use Regexp::Common qw( RE_ws_crop RE_num_real RE_profanity );
 | 
						|
 | 
						|
        $str =~ RE_ws_crop() and die "Surrounded by whitespace";
 | 
						|
 | 
						|
        $str =~ RE_num_real(-base=>8, -sep=>" ") or next;
 | 
						|
 | 
						|
        $offensive = RE_profanity(-keep);
 | 
						|
        $str =~ s/$offensive/$bad{$1}++; "<expletive deleted>"/ge;
 | 
						|
 | 
						|
Note that, unlike the hash-based interface (which returns objects), these
 | 
						|
subroutines return ordinary C<qr>'d regular expressions. Hence they do not
 | 
						|
curry, nor do they provide the OO match and substitution inlining described
 | 
						|
in the previous section.
 | 
						|
 | 
						|
It is also possible to export subroutines for all available patterns like so:
 | 
						|
 | 
						|
        use Regexp::Common 'RE_ALL';
 | 
						|
 | 
						|
Or you can export all subroutines with a common prefix of keys like so:
 | 
						|
 | 
						|
        use Regexp::Common 'RE_num_ALL';
 | 
						|
 | 
						|
which will export C<RE_num_int> and C<RE_num_real> (and if you have
 | 
						|
create more patterns who have first key I<num>, those will be exported
 | 
						|
as well). In general, I<RE_key1_..._keyn_ALL> will export all subroutines
 | 
						|
whose pattern names have first keys I<key1> ... I<keyn>.
 | 
						|
 | 
						|
 | 
						|
=head2 Adding new regular expressions
 | 
						|
 | 
						|
You can add your own regular expressions to the C<%RE> hash at run-time,
 | 
						|
using the exportable C<pattern> subroutine. It expects a hash-like list of
 | 
						|
key/value pairs that specify the behaviour of the pattern. The various
 | 
						|
possible argument pairs are:
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item C<name =E<gt> [ @list ]>
 | 
						|
 | 
						|
A required argument that specifies the name of the pattern, and any
 | 
						|
flags it may take, via a reference to a list of strings. For example:
 | 
						|
 | 
						|
         pattern name => [qw( line of -char )],
 | 
						|
                 # other args here
 | 
						|
                 ;
 | 
						|
 | 
						|
This specifies an entry C<$RE{line}{of}>, which may take a C<-char> flag.
 | 
						|
 | 
						|
Flags may also be specified with a default value, which is then used whenever
 | 
						|
the flag is specified without an explicit value (but not when the flag is
 | 
						|
omitted). For example:
 | 
						|
 | 
						|
         pattern name => [qw( line of -char=_ )],
 | 
						|
                 # default char is '_'
 | 
						|
                 # other args here
 | 
						|
                 ;
 | 
						|
 | 
						|
 | 
						|
=item C<create =E<gt> $sub_ref_or_string>
 | 
						|
 | 
						|
A required argument that specifies either a string that is to be returned
 | 
						|
as the pattern:
 | 
						|
 | 
						|
        pattern name    => [qw( line of underscores )],
 | 
						|
                create  => q/(?:^_+$)/
 | 
						|
                ;
 | 
						|
 | 
						|
or a reference to a subroutine that will be called to create the pattern:
 | 
						|
 | 
						|
        pattern name    => [qw( line of -char=_ )],
 | 
						|
                create  => sub {
 | 
						|
                                my ($self, $flags) = @_;
 | 
						|
                                my $char = quotemeta $flags->{-char};
 | 
						|
                                return '(?:^$char+$)';
 | 
						|
                            },
 | 
						|
                ;
 | 
						|
 | 
						|
If the subroutine version is used, the subroutine will be called with
 | 
						|
three arguments: a reference to the pattern object itself, a reference
 | 
						|
to a hash containing the flags and their values,
 | 
						|
and a reference to an array containing the non-flag keys.
 | 
						|
 | 
						|
Whatever the subroutine returns is stringified as the pattern.
 | 
						|
 | 
						|
No matter how the pattern is created, it is immediately postprocessed to
 | 
						|
include or exclude capturing parentheses (according to the value of the
 | 
						|
C<-keep> flag). To specify such "optional" capturing parentheses within
 | 
						|
the regular expression associated with C<create>, use the notation
 | 
						|
C<(?k:...)>. Any parentheses of this type will be converted to C<(...)>
 | 
						|
when the C<-keep> flag is specified, or C<(?:...)> when it is not.
 | 
						|
It is a Regexp::Common convention that the outermost capturing parentheses
 | 
						|
always capture the entire pattern, but this is not enforced.
 | 
						|
 | 
						|
 | 
						|
=item C<match =E<gt> $sub_ref>
 | 
						|
 | 
						|
An optional argument that specifies a subroutine that is to be called when
 | 
						|
the C<$RE{...}-E<gt>matches(...)> method of this pattern is invoked.
 | 
						|
 | 
						|
The subroutine should expect two arguments: a reference to the pattern object
 | 
						|
itself, and the string to be matched against.
 | 
						|
 | 
						|
It should return the same types of values as a C<m/.../> does.
 | 
						|
 | 
						|
     pattern name    => [qw( line of -char )],
 | 
						|
             create  => sub {...},
 | 
						|
             match   => sub {
 | 
						|
                             my ($self, $str) = @_;
 | 
						|
                             $str !~ /[^$self->{flags}{-char}]/;
 | 
						|
                        },
 | 
						|
             ;
 | 
						|
 | 
						|
 | 
						|
=item C<subs =E<gt> $sub_ref>
 | 
						|
 | 
						|
An optional argument that specifies a subroutine that is to be called when
 | 
						|
the C<$RE{...}-E<gt>subs(...)> method of this pattern is invoked.
 | 
						|
 | 
						|
The subroutine should expect three arguments: a reference to the pattern object
 | 
						|
itself, the string to be changed, and the value to be substituted into it.
 | 
						|
The third argument may be C<undef>, indicating the default substitution is
 | 
						|
required.
 | 
						|
 | 
						|
The subroutine should return the same types of values as an C<s/.../.../> does.
 | 
						|
 | 
						|
For example:
 | 
						|
 | 
						|
     pattern name    => [ 'lineof', '-char=_' ],
 | 
						|
             create  => sub {...},
 | 
						|
             subs    => sub {
 | 
						|
                          my ($self, $str, $ignore_replacement) = @_;
 | 
						|
                          $_[1] =~ s/^$self->{flags}{-char}+$//g;
 | 
						|
                        },
 | 
						|
             ;
 | 
						|
 | 
						|
Note that such a subroutine will almost always need to modify C<$_[1]> directly.
 | 
						|
 | 
						|
 | 
						|
=item C<version =E<gt> $minimum_perl_version>
 | 
						|
 | 
						|
If this argument is given, it specifies the minimum version of perl required
 | 
						|
to use the new pattern. Attempts to use the pattern with earlier versions of
 | 
						|
perl will generate a fatal diagnostic.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head2 Loading specific sets of patterns.
 | 
						|
 | 
						|
By default, all the sets of patterns listed below are made available.
 | 
						|
However, it is possible to indicate which sets of patterns should
 | 
						|
be made available - the wanted sets should be given as arguments to
 | 
						|
C<use>. Alternatively, it is also possible to indicate which sets of
 | 
						|
patterns should not be made available - those sets will be given as
 | 
						|
argument to the C<use> statement, but are preceded with an exclaimation
 | 
						|
mark. The argument I<no_defaults> indicates none of the default patterns
 | 
						|
should be made available. This is useful for instance if all you want
 | 
						|
is the C<pattern()> subroutine.
 | 
						|
 | 
						|
Examples:
 | 
						|
 | 
						|
 use Regexp::Common qw /comment number/;  # Comment and number patterns.
 | 
						|
 use Regexp::Common qw /no_defaults/;     # Don't load any patterns.
 | 
						|
 use Regexp::Common qw /!delimited/;      # All, but delimited patterns.
 | 
						|
 | 
						|
It's also possible to load your own set of patterns. If you have a
 | 
						|
module C<Regexp::Common::my_patterns> that makes patterns available,
 | 
						|
you can have it made available with
 | 
						|
 | 
						|
 use Regexp::Common qw /my_patterns/;
 | 
						|
 | 
						|
Note that the default patterns will still be made available - only if
 | 
						|
you use I<no_defaults>, or mention one of the default sets explicitly,
 | 
						|
the non mentioned defaults aren't made available.
 | 
						|
 | 
						|
=head2 List of available patterns
 | 
						|
 | 
						|
The patterns listed below are currently available. Each set of patterns
 | 
						|
has its own manual page describing the details. For each pattern set
 | 
						|
named I<name>, the manual page I<Regexp::Common::name> describes the
 | 
						|
details.
 | 
						|
 | 
						|
Currently available are:
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item Regexp::Common::balanced
 | 
						|
 | 
						|
Provides regexes for strings with balanced parenthesized delimiters.
 | 
						|
 | 
						|
=item Regexp::Common::comment
 | 
						|
 | 
						|
Provides regexes for comments of various languages (43 languages
 | 
						|
currently).
 | 
						|
 | 
						|
=item Regexp::Common::delimited
 | 
						|
 | 
						|
Provides regexes for delimited strings.
 | 
						|
 | 
						|
=item Regexp::Common::lingua
 | 
						|
 | 
						|
Provides regexes for palindromes.
 | 
						|
 | 
						|
=item Regexp::Common::list
 | 
						|
 | 
						|
Provides regexes for lists.
 | 
						|
 | 
						|
=item Regexp::Common::net
 | 
						|
 | 
						|
Provides regexes for IPv4, IPv6, and MAC addresses.
 | 
						|
 | 
						|
=item Regexp::Common::number
 | 
						|
 | 
						|
Provides regexes for numbers (integers and reals).
 | 
						|
 | 
						|
=item Regexp::Common::profanity
 | 
						|
 | 
						|
Provides regexes for profanity.
 | 
						|
 | 
						|
=item Regexp::Common::whitespace
 | 
						|
 | 
						|
Provides regexes for leading and trailing whitespace.
 | 
						|
 | 
						|
=item Regexp::Common::zip
 | 
						|
 | 
						|
Provides regexes for zip codes.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head2 Forthcoming patterns and features
 | 
						|
 | 
						|
Future releases of the module will also provide patterns for the following:
 | 
						|
 | 
						|
        * email addresses
 | 
						|
        * HTML/XML tags
 | 
						|
        * more numerical matchers,
 | 
						|
        * mail headers (including multiline ones),
 | 
						|
        * more URLS
 | 
						|
        * telephone numbers of various countries
 | 
						|
        * currency (universal 3 letter format, Latin-1, currency names)
 | 
						|
        * dates
 | 
						|
        * binary formats (e.g. UUencoded, MIMEd)
 | 
						|
 | 
						|
If you have other patterns or pattern generators that you think would be
 | 
						|
generally useful, please send them to the maintainer -- preferably as source
 | 
						|
code using the C<pattern> subroutine. Submissions that include a set of
 | 
						|
tests will be especially welcome.
 | 
						|
 | 
						|
 | 
						|
=head1 DIAGNOSTICS
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item C<Can't export unknown subroutine %s>
 | 
						|
 | 
						|
The subroutine-based interface didn't recognize the requested subroutine.
 | 
						|
Often caused by a spelling mistake or an incompletely specified name.
 | 
						|
 | 
						|
 | 
						|
=item C<Can't create unknown regex: $RE{...}>
 | 
						|
 | 
						|
Regexp::Common doesn't have a generator for the requested pattern.
 | 
						|
Often indicates a misspelt or missing parameter.
 | 
						|
 | 
						|
=item
 | 
						|
C<Perl %f does not support the pattern $RE{...}.
 | 
						|
You need Perl %f or later>
 | 
						|
 | 
						|
The requested pattern requires advanced regex features (e.g. recursion)
 | 
						|
that not available in your version of Perl. Time to upgrade.
 | 
						|
 | 
						|
=item C<< pattern() requires argument: name => [ @list ] >>
 | 
						|
 | 
						|
Every user-defined pattern specification must have a name.
 | 
						|
 | 
						|
=item C<< pattern() requires argument: create => $sub_ref_or_string >>
 | 
						|
 | 
						|
Every user-defined pattern specification must provide a pattern creation
 | 
						|
mechanism: either a pattern string or a reference to a subroutine that
 | 
						|
returns the pattern string.
 | 
						|
 | 
						|
=item C<Base must be between 1 and 36>
 | 
						|
 | 
						|
The C<< $RE{num}{real}{-base=>'I<N>'} >> pattern uses the characters [0-9A-Z]
 | 
						|
to represent the digits of various bases. Hence it only produces
 | 
						|
regular expressions for bases up to hexatricensimal.
 | 
						|
 | 
						|
=item C<Must specify delimiter in $RE{delimited}>
 | 
						|
 | 
						|
The pattern has no default delimiter.
 | 
						|
You need to write: C<< $RE{delimited}{-delim=>I<X>'} >> for some character I<X>
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head1 ACKNOWLEDGEMENTS
 | 
						|
 | 
						|
Deepest thanks to the many people who have encouraged and contributed to this
 | 
						|
project, especially: Elijah, Jarkko, Tom, Nat, Ed, and Vivek.
 | 
						|
 | 
						|
Further thanks go to: Alexandr Ciornii, Blair Zajac, Bob Stockdale,
 | 
						|
Charles Thomas, Chris Vertonghen, the CPAN Testers, David Hand,
 | 
						|
Fany, Geoffrey Leach, Hermann-Marcus Behrens, Jerome Quelin, Jim Cromie,
 | 
						|
Lars Wilke, Linda Julien, Mike Arms, Mike Castle, Mikko, Murat Uenalan,
 | 
						|
RafaE<235>l Garcia-Suarez, Ron Savage, Sam Vilain, Slaven Rezic, Smylers,
 | 
						|
Tim Maher, and all the others I've forgotten.
 | 
						|
 | 
						|
=head1 AUTHOR
 | 
						|
 | 
						|
Damian Conway (damian@conway.org)
 | 
						|
 | 
						|
=head1 MAINTENANCE
 | 
						|
 | 
						|
This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>.
 | 
						|
 | 
						|
=head1 BUGS AND IRRITATIONS
 | 
						|
 | 
						|
Bound to be plenty.
 | 
						|
 | 
						|
For a start, there are many common regexes missing.
 | 
						|
Send them in to I<regexp-common@abigail.be>.
 | 
						|
 | 
						|
There are some POD issues when installing this module using a pre-5.6.0 perl;
 | 
						|
some manual pages may not install, or may not install correctly using a perl
 | 
						|
that is that old. You might consider upgrading your perl.
 | 
						|
 | 
						|
=head1 NOT A BUG
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item *
 | 
						|
 | 
						|
The various patterns are not anchored. That is, a pattern like
 | 
						|
C<< $RE {num} {int} >> will match against "abc4def", because a
 | 
						|
substring of the subject matches. This is by design, and not a
 | 
						|
bug. If you want the pattern to be anchored, use something like:
 | 
						|
 | 
						|
 my $integer = $RE {num} {int};
 | 
						|
 $subj =~ /^$integer$/ and print "Matches!\n";
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head1 LICENSE and COPYRIGHT
 | 
						|
 | 
						|
This software is Copyright (c) 2001 - 2017, Damian Conway and Abigail.
 | 
						|
 | 
						|
This module is free software, and maybe used under any of the following
 | 
						|
licenses:
 | 
						|
 | 
						|
 1) The Perl Artistic License.     See the file COPYRIGHT.AL.
 | 
						|
 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
 | 
						|
 3) The BSD License.               See the file COPYRIGHT.BSD.
 | 
						|
 4) The MIT License.               See the file COPYRIGHT.MIT.
 | 
						|
EOCommon
 | 
						|
# 2}}}
 | 
						|
$Regexp_Common_Contents{'Common/comment'} = <<'EOC';   # {{{2
 | 
						|
package Regexp::Common::comment;
 | 
						|
 | 
						|
use 5.10.0;
 | 
						|
 | 
						|
use strict;
 | 
						|
use warnings;
 | 
						|
no  warnings 'syntax';
 | 
						|
 | 
						|
use Regexp::Common qw /pattern clean no_defaults/;
 | 
						|
 | 
						|
our $VERSION = '2017060201';
 | 
						|
 | 
						|
my @generic = (
 | 
						|
    {languages => [qw /ABC Forth/],
 | 
						|
     to_eol    => ['\\\\']},   # This is for just a *single* backslash.
 | 
						|
 | 
						|
    {languages => [qw /Ada Alan Eiffel lua/],
 | 
						|
     to_eol    => ['--']},
 | 
						|
 | 
						|
    {languages => [qw /Advisor/],
 | 
						|
     to_eol    => ['#|//']},
 | 
						|
 | 
						|
    {languages => [qw /Advsys CQL Lisp LOGO M MUMPS REBOL Scheme
 | 
						|
                       SMITH zonefile/],
 | 
						|
     to_eol    => [';']},
 | 
						|
 | 
						|
    {languages => ['Algol 60'],
 | 
						|
     from_to   => [[qw /comment ;/]]},
 | 
						|
 | 
						|
    {languages => [qw {ALPACA B C C-- LPC PL/I}],
 | 
						|
     from_to   => [[qw {/* */}]]},
 | 
						|
 | 
						|
    {languages => [qw /awk fvwm2 Icon m4 mutt Perl Python QML
 | 
						|
                       R Ruby shell Tcl/],
 | 
						|
     to_eol    => ['#']},
 | 
						|
 | 
						|
    {languages => [[BASIC => 'mvEnterprise']],
 | 
						|
     to_eol    => ['[*!]|REM']},
 | 
						|
 | 
						|
    {languages => [qw /Befunge-98 Funge-98 Shelta/],
 | 
						|
     id        => [';']},
 | 
						|
 | 
						|
    {languages => ['beta-Juliet', 'Crystal Report', 'Portia', 'Ubercode'],
 | 
						|
     to_eol    => ['//']},
 | 
						|
 | 
						|
    {languages => ['BML'],
 | 
						|
     from_to   => [['<?_c', '_c?>']],
 | 
						|
    },
 | 
						|
 | 
						|
    {languages => [qw /C++/, 'C#', 'X++', qw /Cg ECMAScript FPL Java JavaScript/],
 | 
						|
     to_eol    => ['//'],
 | 
						|
     from_to   => [[qw {/* */}]]},
 | 
						|
 | 
						|
    {languages => [qw /CLU LaTeX slrn TeX/],
 | 
						|
     to_eol    => ['%']},
 | 
						|
 | 
						|
    {languages => [qw /False/],
 | 
						|
     from_to   => [[qw !{ }!]]},
 | 
						|
 | 
						|
    {languages => [qw /Fortran/],
 | 
						|
     to_eol    => ['!']},
 | 
						|
 | 
						|
    {languages => [qw /Haifu/],
 | 
						|
     id        => [',']},
 | 
						|
 | 
						|
    {languages => [qw /ILLGOL/],
 | 
						|
     to_eol    => ['NB']},
 | 
						|
 | 
						|
    {languages => [qw /INTERCAL/],
 | 
						|
     to_eol    => [q{(?:(?:PLEASE(?:\s+DO)?|DO)\s+)?(?:NOT|N'T)}]},
 | 
						|
 | 
						|
    {languages => [qw /J/],
 | 
						|
     to_eol    => ['NB[.]']},
 | 
						|
 | 
						|
    {languages => [qw /JavaDoc/],
 | 
						|
     from_to   => [[qw {/** */}]]},
 | 
						|
 | 
						|
    {languages => [qw /Nickle/],
 | 
						|
     to_eol    => ['#'],
 | 
						|
     from_to   => [[qw {/* */}]]},
 | 
						|
 | 
						|
    {languages => [qw /Oberon/],
 | 
						|
     from_to   => [[qw /(* *)/]]},
 | 
						|
 | 
						|
    {languages => [[qw /Pascal Delphi/], [qw /Pascal Free/], [qw /Pascal GPC/]],
 | 
						|
     to_eol    => ['//'],
 | 
						|
     from_to   => [[qw !{ }!], [qw !(* *)!]]},
 | 
						|
 | 
						|
    {languages => [[qw /Pascal Workshop/]],
 | 
						|
     id        => [qw /"/],
 | 
						|
     from_to   => [[qw !{ }!], [qw !(* *)!], [qw !/* */!]]},
 | 
						|
 | 
						|
    {languages => [qw /PEARL/],
 | 
						|
     to_eol    => ['!'],
 | 
						|
     from_to   => [[qw {/* */}]]},
 | 
						|
 | 
						|
    {languages => [qw /PHP/],
 | 
						|
     to_eol    => ['#', '//'],
 | 
						|
     from_to   => [[qw {/* */}]]},
 | 
						|
 | 
						|
    {languages => [qw !PL/B!],
 | 
						|
     to_eol    => ['[.;]']},
 | 
						|
 | 
						|
    {languages => [qw !PL/SQL!],
 | 
						|
     to_eol    => ['--'],
 | 
						|
     from_to   => [[qw {/* */}]]},
 | 
						|
 | 
						|
    {languages => [qw /Q-BAL/],
 | 
						|
     to_eol    => ['`']},
 | 
						|
 | 
						|
    {languages => [qw /Smalltalk/],
 | 
						|
     id        => ['"']},
 | 
						|
 | 
						|
    {languages => [qw /SQL/],
 | 
						|
     to_eol    => ['-{2,}']},
 | 
						|
 | 
						|
    {languages => [qw /troff/],
 | 
						|
     to_eol    => ['\\\"']},
 | 
						|
 | 
						|
    {languages => [qw /vi/],
 | 
						|
     to_eol    => ['"']},
 | 
						|
 | 
						|
    {languages => [qw /*W/],
 | 
						|
     from_to   => [[qw {|| !!}]]},
 | 
						|
 | 
						|
    {languages => [qw /ZZT-OOP/],
 | 
						|
     to_eol    => ["'"]},
 | 
						|
);
 | 
						|
 | 
						|
my @plain_or_nested = (
 | 
						|
   [Caml         =>  undef,       "(*"  => "*)"],
 | 
						|
   [Dylan        =>  "//",        "/*"  => "*/"],
 | 
						|
   [Haskell      =>  "-{2,}",     "{-"  => "-}"],
 | 
						|
   [Hugo         =>  "!(?!\\\\)", "!\\" => "\\!"],
 | 
						|
   [SLIDE        =>  "#",         "(*"  => "*)"],
 | 
						|
  ['Modula-2'    =>  undef,       "(*"  => "*)"],
 | 
						|
  ['Modula-3'    =>  undef,       "(*"  => "*)"],
 | 
						|
);
 | 
						|
 | 
						|
#
 | 
						|
# Helper subs.
 | 
						|
#
 | 
						|
 | 
						|
sub combine      {
 | 
						|
    local $_ = join "|", @_;
 | 
						|
    if (@_ > 1) {
 | 
						|
        s/\(\?k:/(?:/g;
 | 
						|
        $_ = "(?k:$_)";
 | 
						|
    }
 | 
						|
    $_
 | 
						|
}
 | 
						|
 | 
						|
sub to_eol  ($)  {"(?k:(?k:$_[0])(?k:[^\\n]*)(?k:\\n))"}
 | 
						|
sub id      ($)  {"(?k:(?k:$_[0])(?k:[^$_[0]]*)(?k:$_[0]))"}  # One char only!
 | 
						|
sub from_to      {
 | 
						|
    my ($begin, $end) = @_;
 | 
						|
 | 
						|
    my $qb  = quotemeta $begin;
 | 
						|
    my $qe  = quotemeta $end;
 | 
						|
    my $fe  = quotemeta substr $end   => 0, 1;
 | 
						|
    my $te  = quotemeta substr $end   => 1;
 | 
						|
 | 
						|
    "(?k:(?k:$qb)(?k:(?:[^$fe]+|$fe(?!$te))*)(?k:$qe))";
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
my $count = 0;
 | 
						|
sub nested {
 | 
						|
    my ($begin, $end) = @_;
 | 
						|
 | 
						|
    $count ++;
 | 
						|
    my $r = '(??{$Regexp::Common::comment ['. $count . ']})';
 | 
						|
 | 
						|
    my $qb  = quotemeta $begin;
 | 
						|
    my $qe  = quotemeta $end;
 | 
						|
    my $fb  = quotemeta substr $begin => 0, 1;
 | 
						|
    my $fe  = quotemeta substr $end   => 0, 1;
 | 
						|
 | 
						|
    my $tb  = quotemeta substr $begin => 1;
 | 
						|
    my $te  = quotemeta substr $end   => 1;
 | 
						|
 | 
						|
    use re 'eval';
 | 
						|
 | 
						|
    my $re;
 | 
						|
    if ($fb eq $fe) {
 | 
						|
        $re = qr /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|$r)*$qe)/;
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        local $"      =  "|";
 | 
						|
        my   @clauses =  "(?>[^$fb$fe]+)";
 | 
						|
        push @clauses => "$fb(?!$tb)" if length $tb;
 | 
						|
        push @clauses => "$fe(?!$te)" if length $te;
 | 
						|
        push @clauses =>  $r;
 | 
						|
        $re           =   qr /(?:$qb(?:@clauses)*$qe)/;
 | 
						|
    }
 | 
						|
 | 
						|
    $Regexp::Common::comment [$count] = qr/$re/;
 | 
						|
}
 | 
						|
 | 
						|
#
 | 
						|
# Process data.
 | 
						|
#
 | 
						|
 | 
						|
foreach my $info (@plain_or_nested) {
 | 
						|
    my ($language, $mark, $begin, $end) = @$info;
 | 
						|
    pattern name    => [comment => $language],
 | 
						|
            create  =>
 | 
						|
                sub {my $re     = nested $begin => $end;
 | 
						|
                     my $prefix = defined $mark ? $mark . "[^\n]*\n|" : "";
 | 
						|
                     exists $_ [1] -> {-keep} ? qr /($prefix$re)/
 | 
						|
                                              : qr  /$prefix$re/
 | 
						|
                },
 | 
						|
            ;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
foreach my $group (@generic) {
 | 
						|
    my $pattern = combine +(map {to_eol   $_} @{$group -> {to_eol}}),
 | 
						|
                           (map {from_to @$_} @{$group -> {from_to}}),
 | 
						|
                           (map {id       $_} @{$group -> {id}}),
 | 
						|
                  ;
 | 
						|
    foreach my $language  (@{$group -> {languages}}) {
 | 
						|
        pattern name    => [comment => ref $language ? @$language : $language],
 | 
						|
                create  => $pattern,
 | 
						|
                ;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
#
 | 
						|
# Other languages.
 | 
						|
#
 | 
						|
 | 
						|
# http://www.pascal-central.com/docs/iso10206.txt
 | 
						|
pattern name    => [qw /comment Pascal/],
 | 
						|
        create  => '(?k:' . '(?k:[{]|[(][*])'
 | 
						|
                          . '(?k:[^}*]*(?:[*](?![)])[^}*]*)*)'
 | 
						|
                          . '(?k:[}]|[*][)])'
 | 
						|
                          . ')'
 | 
						|
        ;
 | 
						|
 | 
						|
# http://www.templetons.com/brad/alice/language/
 | 
						|
pattern name    =>  [qw /comment Pascal Alice/],
 | 
						|
        create  =>  '(?k:(?k:[{])(?k:[^}\n]*)(?k:[}]))'
 | 
						|
        ;
 | 
						|
 | 
						|
 | 
						|
# http://westein.arb-phys.uni-dortmund.de/~wb/a68s.txt
 | 
						|
pattern name    => [qw (comment), 'Algol 68'],
 | 
						|
        create  => q {(?k:(?:#[^#]*#)|}                           .
 | 
						|
                   q {(?:\bco\b(?:[^c]+|\Bc|\bc(?!o\b))*\bco\b)|} .
 | 
						|
                   q {(?:\bcomment\b(?:[^c]+|\Bc|\bc(?!omment\b))*\bcomment\b))}
 | 
						|
        ;
 | 
						|
 | 
						|
 | 
						|
# See rules 91 and 92 of ISO 8879 (SGML).
 | 
						|
# Charles F. Goldfarb: "The SGML Handbook".
 | 
						|
# Oxford: Oxford University Press. 1990. ISBN 0-19-853737-9.
 | 
						|
# Ch. 10.3, pp 390.
 | 
						|
pattern name    => [qw (comment HTML)],
 | 
						|
        create  => q {(?k:(?k:<!)(?k:(?:--(?k:[^-]*(?:-[^-]+)*)--\s*)*)(?k:>))},
 | 
						|
        ;
 | 
						|
 | 
						|
 | 
						|
pattern name    => [qw /comment SQL MySQL/],
 | 
						|
        create  => q {(?k:(?:#|-- )[^\n]*\n|} .
 | 
						|
                   q {/\*(?:(?>[^*;"']+)|"[^"]*"|'[^']*'|\*(?!/))*(?:;|\*/))},
 | 
						|
        ;
 | 
						|
 | 
						|
# Anything that isn't <>[]+-.,
 | 
						|
# http://home.wxs.nl/~faase009/Ha_BF.html
 | 
						|
pattern name    => [qw /comment Brainfuck/],
 | 
						|
        create  => '(?k:[^<>\[\]+\-.,]+)'
 | 
						|
        ;
 | 
						|
 | 
						|
# Squeak is a variant of Smalltalk-80.
 | 
						|
# http://www.squeak.
 | 
						|
# http://mucow.com/squeak-qref.html
 | 
						|
pattern name    => [qw /comment Squeak/],
 | 
						|
        create  => '(?k:(?k:")(?k:[^"]*(?:""[^"]*)*)(?k:"))'
 | 
						|
        ;
 | 
						|
 | 
						|
#
 | 
						|
# Scores of less than 5 or above 17....
 | 
						|
# http://www.cliff.biffle.org/esoterica/beatnik.html
 | 
						|
@Regexp::Common::comment::scores = (1,  3,  3,  2,  1,  4,  2,  4,  1,  8,
 | 
						|
                                    5,  1,  3,  1,  1,  3, 10,  1,  1,  1,
 | 
						|
                                    1,  4,  4,  8,  4, 10);
 | 
						|
{
 | 
						|
my ($s, $x);
 | 
						|
pattern name    =>  [qw /comment Beatnik/],
 | 
						|
        create  =>  sub {
 | 
						|
            use re 'eval';
 | 
						|
            my $re = qr {\b([A-Za-z]+)\b
 | 
						|
                         (?(?{($s, $x) = (0, lc $^N);
 | 
						|
                              $s += $Regexp::Common::comment::scores
 | 
						|
                                    [ord (chop $x) - ord ('a')] while length $x;
 | 
						|
                              $s  >= 5 && $s < 18})XXX|)}x;
 | 
						|
            $re;
 | 
						|
        },
 | 
						|
        ;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
# http://www.cray.com/craydoc/manuals/007-3692-005/html-007-3692-005/
 | 
						|
#  (Goto table of contents/3.3 Source Form)
 | 
						|
# Fortran, in fixed format. Comments start with a C, c or * in the first
 | 
						|
# column, or a ! anywhere, but the sixth column. Then end with a newline.
 | 
						|
pattern name    =>  [qw /comment Fortran fixed/],
 | 
						|
        create  =>  '(?k:(?k:(?:^[Cc*]|(?<!^.....)!))(?k:[^\n]*)(?k:\n))'
 | 
						|
        ;
 | 
						|
 | 
						|
 | 
						|
# http://www.csis.ul.ie/cobol/Course/COBOLIntro.htm
 | 
						|
# Traditionally, comments in COBOL were indicated with an asterisk in
 | 
						|
# the seventh column. Modern compilers may be more lenient.
 | 
						|
pattern name    =>  [qw /comment COBOL/],
 | 
						|
        create  =>  '(?<=^......)(?k:(?k:[*])(?k:[^\n]*)(?k:\n))',
 | 
						|
        ;
 | 
						|
 | 
						|
1;
 | 
						|
 | 
						|
 | 
						|
__END__
 | 
						|
 | 
						|
=pod
 | 
						|
 | 
						|
=head1 NAME
 | 
						|
 | 
						|
Regexp::Common::comment -- provide regexes for comments.
 | 
						|
 | 
						|
=head1 SYNOPSIS
 | 
						|
 | 
						|
    use Regexp::Common qw /comment/;
 | 
						|
 | 
						|
    while (<>) {
 | 
						|
        /$RE{comment}{C}/       and  print "Contains a C comment\n";
 | 
						|
        /$RE{comment}{C++}/     and  print "Contains a C++ comment\n";
 | 
						|
        /$RE{comment}{PHP}/     and  print "Contains a PHP comment\n";
 | 
						|
        /$RE{comment}{Java}/    and  print "Contains a Java comment\n";
 | 
						|
        /$RE{comment}{Perl}/    and  print "Contains a Perl comment\n";
 | 
						|
        /$RE{comment}{awk}/     and  print "Contains an awk comment\n";
 | 
						|
        /$RE{comment}{HTML}/    and  print "Contains an HTML comment\n";
 | 
						|
    }
 | 
						|
 | 
						|
    use Regexp::Common qw /comment RE_comment_HTML/;
 | 
						|
 | 
						|
    while (<>) {
 | 
						|
        $_ =~ RE_comment_HTML() and  print "Contains an HTML comment\n";
 | 
						|
    }
 | 
						|
 | 
						|
=head1 DESCRIPTION
 | 
						|
 | 
						|
Please consult the manual of L<Regexp::Common> for a general description
 | 
						|
of the works of this interface.
 | 
						|
 | 
						|
Do not use this module directly, but load it via I<Regexp::Common>.
 | 
						|
 | 
						|
This modules gives you regular expressions for comments in various
 | 
						|
languages.
 | 
						|
 | 
						|
=head2 THE LANGUAGES
 | 
						|
 | 
						|
Below, the comments of each of the languages are described.
 | 
						|
The patterns are available as C<$RE{comment}{I<LANG>}>, foreach
 | 
						|
language I<LANG>. Some languages have variants; it's described
 | 
						|
at the individual languages how to get the patterns for the variants.
 | 
						|
Unless mentioned otherwise,
 | 
						|
C<{-keep}> sets C<$1>, C<$2>, C<$3> and C<$4> to the entire comment,
 | 
						|
the opening marker, the content of the comment, and the closing marker
 | 
						|
(for many languages, the latter is a newline) respectively.
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item ABC
 | 
						|
 | 
						|
Comments in I<ABC> start with a backslash (C<\>), and last till
 | 
						|
the end of the line.
 | 
						|
See L<http://homepages.cwi.nl/%7Esteven/abc/>.
 | 
						|
 | 
						|
=item Ada
 | 
						|
 | 
						|
Comments in I<Ada> start with C<-->, and last till the end of the line.
 | 
						|
 | 
						|
=item Advisor
 | 
						|
 | 
						|
I<Advisor> is a language used by the HP product I<glance>. Comments for
 | 
						|
this language start with either C<#> or C<//>, and last till the
 | 
						|
end of the line.
 | 
						|
 | 
						|
=item Advsys
 | 
						|
 | 
						|
Comments for the I<Advsys> language start with C<;> and last till
 | 
						|
the end of the line. See also L<http://www.wurb.com/if/devsys/12>.
 | 
						|
 | 
						|
=item Alan
 | 
						|
 | 
						|
I<Alan> comments start with C<-->, and last till the end of the line.
 | 
						|
See also L<http://w1.132.telia.com/~u13207378/alan/manual/alanTOC.html>.
 | 
						|
 | 
						|
=item Algol 60
 | 
						|
 | 
						|
Comments in the I<Algol 60> language start with the keyword C<comment>,
 | 
						|
and end with a C<;>. See L<http://www.masswerk.at/algol60/report.htm>.
 | 
						|
 | 
						|
=item Algol 68
 | 
						|
 | 
						|
In I<Algol 68>, comments are either delimited by C<#>, or by one of the
 | 
						|
keywords C<co> or C<comment>. The keywords should not be part of another
 | 
						|
word. See L<http://westein.arb-phys.uni-dortmund.de/~wb/a68s.txt>.
 | 
						|
With C<{-keep}>, only C<$1> will be set, returning the entire comment.
 | 
						|
 | 
						|
=item ALPACA
 | 
						|
 | 
						|
The I<ALPACA> language has comments starting with C</*> and ending with C<*/>.
 | 
						|
 | 
						|
=item awk
 | 
						|
 | 
						|
The I<awk> programming language uses comments that start with C<#>
 | 
						|
and end at the end of the line.
 | 
						|
 | 
						|
=item B
 | 
						|
 | 
						|
The I<B> language has comments starting with C</*> and ending with C<*/>.
 | 
						|
 | 
						|
=item BASIC
 | 
						|
 | 
						|
There are various forms of BASIC around. Currently, we only support the
 | 
						|
variant supported by I<mvEnterprise>, whose pattern is available as
 | 
						|
C<$RE{comment}{BASIC}{mvEnterprise}>. Comments in this language start with a
 | 
						|
C<!>, a C<*> or the keyword C<REM>, and end till the end of the line. See
 | 
						|
L<http://www.rainingdata.com/products/beta/docs/mve/50/ReferenceManual/Basic.pdf>.
 | 
						|
 | 
						|
=item Beatnik
 | 
						|
 | 
						|
The esotoric language I<Beatnik> only uses words consisting of letters.
 | 
						|
Words are scored according to the rules of Scrabble. Words scoring less
 | 
						|
than 5 points, or 18 points or more are considered comments (although
 | 
						|
the compiler might mock at you if you score less than 5 points).
 | 
						|
Regardless whether C<{-keep}>, C<$1> will be set, and set to the
 | 
						|
entire comment. This pattern requires I<perl 5.8.0> or newer.
 | 
						|
 | 
						|
=item beta-Juliet
 | 
						|
 | 
						|
The I<beta-Juliet> programming language has comments that start with
 | 
						|
C<//> and that continue till the end of the line. See also
 | 
						|
L<http://www.catseye.mb.ca/esoteric/b-juliet/index.html>.
 | 
						|
 | 
						|
=item Befunge-98
 | 
						|
 | 
						|
The esotoric language I<Befunge-98> uses comments that start and end
 | 
						|
with a C<;>. See L<http://www.catseye.mb.ca/esoteric/befunge/98/spec98.html>.
 | 
						|
 | 
						|
=item BML
 | 
						|
 | 
						|
I<BML>, or I<Better Markup Language> is an HTML templating language that
 | 
						|
uses comments starting with C<< <?c_ >>, and ending with C<< c_?> >>.
 | 
						|
See L<http://www.livejournal.com/doc/server/bml.index.html>.
 | 
						|
 | 
						|
=item Brainfuck
 | 
						|
 | 
						|
The minimal language I<Brainfuck> uses only eight characters,
 | 
						|
C<E<lt>>, C<E<gt>>, C<[>, C<]>, C<+>, C<->, C<.> and C<,>.
 | 
						|
Any other characters are considered comments. With C<{-keep}>,
 | 
						|
C<$1> is set to the entire comment.
 | 
						|
 | 
						|
=item C
 | 
						|
 | 
						|
The I<C> language has comments starting with C</*> and ending with C<*/>.
 | 
						|
 | 
						|
=item C--
 | 
						|
 | 
						|
The I<C--> language has comments starting with C</*> and ending with C<*/>.
 | 
						|
See L<http://cs.uas.arizona.edu/classes/453/programs/C--Spec.html>.
 | 
						|
 | 
						|
=item C++
 | 
						|
 | 
						|
The I<C++> language has two forms of comments. Comments that start with
 | 
						|
C<//> and last till the end of the line, and comments that start with
 | 
						|
C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
 | 
						|
set, and set to the entire comment.
 | 
						|
 | 
						|
=item C#
 | 
						|
 | 
						|
The I<C#> language has two forms of comments. Comments that start with
 | 
						|
C<//> and last till the end of the line, and comments that start with
 | 
						|
C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
 | 
						|
set, and set to the entire comment.
 | 
						|
See L<http://msdn.microsoft.com/library/default.asp?url=/library/en-us/csspec/html/vclrfcsharpspec_C.asp>.
 | 
						|
 | 
						|
=item Caml
 | 
						|
 | 
						|
Comments in I<Caml> start with C<(*>, end with C<*)>, and can be nested.
 | 
						|
See L<http://www.cs.caltech.edu/courses/cs134/cs134b/book.pdf> and
 | 
						|
L<http://pauillac.inria.fr/caml/index-eng.html>.
 | 
						|
 | 
						|
=item Cg
 | 
						|
 | 
						|
The I<Cg> language has two forms of comments. Comments that start with
 | 
						|
C<//> and last till the end of the line, and comments that start with
 | 
						|
C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
 | 
						|
set, and set to the entire comment.
 | 
						|
See L<http://developer.nvidia.com/attach/3722>.
 | 
						|
 | 
						|
=item CLU
 | 
						|
 | 
						|
In C<CLU>, a comment starts with a procent sign (C<%>), and ends with the
 | 
						|
next newline. See L<ftp://ftp.lcs.mit.edu:/pub/pclu/CLU-syntax.ps> and
 | 
						|
L<http://www.pmg.lcs.mit.edu/CLU.html>.
 | 
						|
 | 
						|
=item COBOL
 | 
						|
 | 
						|
Traditionally, comments in I<COBOL> are indicated by an asterisk in the
 | 
						|
seventh column. This is what the pattern matches. Modern compiler may
 | 
						|
more lenient though. See L<http://www.csis.ul.ie/cobol/Course/COBOLIntro.htm>,
 | 
						|
and L<http://www.csis.ul.ie/cobol/default.htm>.
 | 
						|
 | 
						|
=item CQL
 | 
						|
 | 
						|
Comments in the chess query language (I<CQL>) start with a semi colon
 | 
						|
(C<;>) and last till the end of the line. See L<http://www.rbnn.com/cql/>.
 | 
						|
 | 
						|
=item Crystal Report
 | 
						|
 | 
						|
The formula editor in I<Crystal Reports> uses comments that start
 | 
						|
with C<//>, and end with the end of the line.
 | 
						|
 | 
						|
=item Dylan
 | 
						|
 | 
						|
There are two types of comments in I<Dylan>. They either start with
 | 
						|
C<//>, or are nested comments, delimited with C</*> and C<*/>.
 | 
						|
Under C<{-keep}>, only C<$1> will be set, returning the entire comment.
 | 
						|
This pattern requires I<perl 5.6.0> or newer.
 | 
						|
 | 
						|
=item ECMAScript
 | 
						|
 | 
						|
The I<ECMAScript> language has two forms of comments. Comments that start with
 | 
						|
C<//> and last till the end of the line, and comments that start with
 | 
						|
C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
 | 
						|
set, and set to the entire comment. I<JavaScript> is Netscapes implementation
 | 
						|
of I<ECMAScript>. See
 | 
						|
L<http://www.ecma-international.org/publications/files/ecma-st/Ecma-262.pdf>,
 | 
						|
and L<http://www.ecma-international.org/publications/standards/Ecma-262.htm>.
 | 
						|
 | 
						|
=item Eiffel
 | 
						|
 | 
						|
I<Eiffel> comments start with C<-->, and last till the end of the line.
 | 
						|
 | 
						|
=item False
 | 
						|
 | 
						|
In I<False>, comments start with C<{> and end with C<}>.
 | 
						|
See L<http://wouter.fov120.com/false/false.txt>
 | 
						|
 | 
						|
=item FPL
 | 
						|
 | 
						|
The I<FPL> language has two forms of comments. Comments that start with
 | 
						|
C<//> and last till the end of the line, and comments that start with
 | 
						|
C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
 | 
						|
set, and set to the entire comment.
 | 
						|
 | 
						|
=item Forth
 | 
						|
 | 
						|
Comments in Forth start with C<\>, and end with the end of the line.
 | 
						|
See also L<http://docs.sun.com/sb/doc/806-1377-10>.
 | 
						|
 | 
						|
=item Fortran
 | 
						|
 | 
						|
There are two forms of I<Fortran>. There's free form I<Fortran>, which
 | 
						|
has comments that start with C<!>, and end at the end of the line.
 | 
						|
The pattern for this is given by C<$RE{Fortran}>. Fixed form I<Fortran>,
 | 
						|
which has been obsoleted, has comments that start with C<C>, C<c> or
 | 
						|
C<*> in the first column, or with C<!> anywhere, but the sixth column.
 | 
						|
The pattern for this are given by C<$RE{Fortran}{fixed}>.
 | 
						|
 | 
						|
See also L<http://www.cray.com/craydoc/manuals/007-3692-005/html-007-3692-005/>.
 | 
						|
 | 
						|
=item Funge-98
 | 
						|
 | 
						|
The esotoric language I<Funge-98> uses comments that start and end with
 | 
						|
a C<;>.
 | 
						|
 | 
						|
=item fvwm2
 | 
						|
 | 
						|
Configuration files for I<fvwm2> have comments starting with a
 | 
						|
C<#> and lasting the rest of the line.
 | 
						|
 | 
						|
=item Haifu
 | 
						|
 | 
						|
I<Haifu>, an esotoric language using haikus, has comments starting and
 | 
						|
ending with a C<,>.
 | 
						|
See L<http://www.dangermouse.net/esoteric/haifu.html>.
 | 
						|
 | 
						|
=item Haskell
 | 
						|
 | 
						|
There are two types of comments in I<Haskell>. They either start with
 | 
						|
at least two dashes, or are nested comments, delimited with C<{-> and C<-}>.
 | 
						|
Under C<{-keep}>, only C<$1> will be set, returning the entire comment.
 | 
						|
This pattern requires I<perl 5.6.0> or newer.
 | 
						|
 | 
						|
=item HTML
 | 
						|
 | 
						|
In I<HTML>, comments only appear inside a I<comment declaration>.
 | 
						|
A comment declaration starts with a C<E<lt>!>, and ends with a
 | 
						|
C<E<gt>>. Inside this declaration, we have zero or more comments.
 | 
						|
Comments starts with C<--> and end with C<-->, and are optionally
 | 
						|
followed by whitespace. The pattern C<$RE{comment}{HTML}> recognizes
 | 
						|
those comment declarations (and hence more than a comment).
 | 
						|
Note that this is not the same as something that starts with
 | 
						|
C<E<lt>!--> and ends with C<--E<gt>>, because the following will
 | 
						|
be matched completely:
 | 
						|
 | 
						|
    <!--  First  Comment   --
 | 
						|
      --> Second Comment <!--
 | 
						|
      --  Third  Comment   -->
 | 
						|
 | 
						|
Do not be fooled by what your favourite browser thinks is an HTML
 | 
						|
comment.
 | 
						|
 | 
						|
If C<{-keep}> is used, the following are returned:
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item $1
 | 
						|
 | 
						|
captures the entire comment declaration.
 | 
						|
 | 
						|
=item $2
 | 
						|
 | 
						|
captures the MDO (markup declaration open), C<E<lt>!>.
 | 
						|
 | 
						|
=item $3
 | 
						|
 | 
						|
captures the content between the MDO and the MDC.
 | 
						|
 | 
						|
=item $4
 | 
						|
 | 
						|
captures the (last) comment, without the surrounding dashes.
 | 
						|
 | 
						|
=item $5
 | 
						|
 | 
						|
captures the MDC (markup declaration close), C<E<gt>>.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=item Hugo
 | 
						|
 | 
						|
There are two types of comments in I<Hugo>. They either start with
 | 
						|
C<!> (which cannot be followed by a C<\>), or are nested comments,
 | 
						|
delimited with C<!\> and C<\!>.
 | 
						|
Under C<{-keep}>, only C<$1> will be set, returning the entire comment.
 | 
						|
This pattern requires I<perl 5.6.0> or newer.
 | 
						|
 | 
						|
=item Icon
 | 
						|
 | 
						|
I<Icon> has comments that start with C<#> and end at the next new line.
 | 
						|
See L<http://www.toolsofcomputing.com/IconHandbook/IconHandbook.pdf>,
 | 
						|
L<http://www.cs.arizona.edu/icon/index.htm>, and
 | 
						|
L<http://burks.bton.ac.uk/burks/language/icon/index.htm>.
 | 
						|
 | 
						|
=item ILLGOL
 | 
						|
 | 
						|
The esotoric language I<ILLGOL> uses comments starting with I<NB> and lasting
 | 
						|
till the end of the line.
 | 
						|
See L<http://www.catseye.mb.ca/esoteric/illgol/index.html>.
 | 
						|
 | 
						|
=item INTERCAL
 | 
						|
 | 
						|
Comments in INTERCAL are single line comments. They start with one of
 | 
						|
the keywords C<NOT> or C<N'T>, and can optionally be preceded by the
 | 
						|
keywords C<DO> and C<PLEASE>. If both keywords are used, C<PLEASE>
 | 
						|
precedes C<DO>. Keywords are separated by whitespace.
 | 
						|
 | 
						|
=item J
 | 
						|
 | 
						|
The language I<J> uses comments that start with C<NB.>, and that last till
 | 
						|
the end of the line. See
 | 
						|
L<http://www.jsoftware.com/books/help/primer/contents.htm>, and
 | 
						|
L<http://www.jsoftware.com/>.
 | 
						|
 | 
						|
=item Java
 | 
						|
 | 
						|
The I<Java> language has two forms of comments. Comments that start with
 | 
						|
C<//> and last till the end of the line, and comments that start with
 | 
						|
C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
 | 
						|
set, and set to the entire comment.
 | 
						|
 | 
						|
=item JavaDoc
 | 
						|
 | 
						|
The I<Javadoc> documentation syntax is demarked with a subset of
 | 
						|
ordinary Java comments to separate it from code.  Comments start with
 | 
						|
C</**> end with C<*/>.  If C<{-keep}> is used, only C<$1> will be set,
 | 
						|
and set to the entire comment. See
 | 
						|
L<http://www.oracle.com/technetwork/java/javase/documentation/index-137868.html#format>.
 | 
						|
 | 
						|
=item JavaScript
 | 
						|
 | 
						|
The I<JavaScript> language has two forms of comments. Comments that start with
 | 
						|
C<//> and last till the end of the line, and comments that start with
 | 
						|
C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
 | 
						|
set, and set to the entire comment. I<JavaScript> is Netscapes implementation
 | 
						|
of I<ECMAScript>.
 | 
						|
See L<http://www.mozilla.org/js/language/E262-3.pdf>,
 | 
						|
and L<http://www.mozilla.org/js/language/>.
 | 
						|
 | 
						|
=item LaTeX
 | 
						|
 | 
						|
The documentation language I<LaTeX> uses comments starting with C<%>
 | 
						|
and ending at the end of the line.
 | 
						|
 | 
						|
=item Lisp
 | 
						|
 | 
						|
Comments in I<Lisp> start with a semi-colon (C<;>) and last till the
 | 
						|
end of the line.
 | 
						|
 | 
						|
=item LPC
 | 
						|
 | 
						|
The I<LPC> language has comments starting with C</*> and ending with C<*/>.
 | 
						|
 | 
						|
=item LOGO
 | 
						|
 | 
						|
Comments for the language I<LOGO> start with C<;>, and last till the end
 | 
						|
of the line.
 | 
						|
 | 
						|
=item lua
 | 
						|
 | 
						|
Comments for the I<lua> language start with C<-->, and last till the end
 | 
						|
of the line. See also L<http://www.lua.org/manual/manual.html>.
 | 
						|
 | 
						|
=item M, MUMPS
 | 
						|
 | 
						|
In C<M> (aka C<MUMPS>), comments start with a semi-colon, and last
 | 
						|
till the end of a line. The language specification requires the
 | 
						|
semi-colon to be preceded by one or more I<linestart character>s.
 | 
						|
Those characters default to a space, but that's configurable. This
 | 
						|
requirement, of preceding the comment with linestart characters is
 | 
						|
B<not> tested for. See
 | 
						|
L<ftp://ftp.intersys.com/pub/openm/ism/ism64docs.zip>,
 | 
						|
L<http://mtechnology.intersys.com/mproducts/openm/index.html>, and
 | 
						|
L<http://mcenter.com/mtrc/index.html>.
 | 
						|
 | 
						|
=item m4
 | 
						|
 | 
						|
By default, the preprocessor language I<m4> uses single line comments,
 | 
						|
that start with a C<#> and continue to the end of the line, including
 | 
						|
the newline. The pattern C<$RE {comment} {m4}> matches such comments.
 | 
						|
In I<m4>, it is possible to change the starting token though.
 | 
						|
See L<http://wolfram.schneider.org/bsd/7thEdManVol2/m4/m4.pdf>,
 | 
						|
L<http://www.cs.stir.ac.uk/~kjt/research/pdf/expl-m4.pdf>, and
 | 
						|
L<http://www.gnu.org/software/m4/manual/>.
 | 
						|
 | 
						|
=item Modula-2
 | 
						|
 | 
						|
In C<Modula-2>, comments start with C<(*>, and end with C<*)>. Comments
 | 
						|
may be nested. See L<http://www.modula2.org/>.
 | 
						|
 | 
						|
=item Modula-3
 | 
						|
 | 
						|
In C<Modula-3>, comments start with C<(*>, and end with C<*)>. Comments
 | 
						|
may be nested. See L<http://www.m3.org/>.
 | 
						|
 | 
						|
=item mutt
 | 
						|
 | 
						|
Configuration files for I<mutt> have comments starting with a
 | 
						|
C<#> and lasting the rest of the line.
 | 
						|
 | 
						|
=item Nickle
 | 
						|
 | 
						|
The I<Nickle> language has one line comments starting with C<#>
 | 
						|
(like Perl), or multiline comments delimited by C</*> and C<*/>
 | 
						|
(like C). Under C<-keep>, only C<$1> will be set. See also
 | 
						|
L<http://www.nickle.org>.
 | 
						|
 | 
						|
=item Oberon
 | 
						|
 | 
						|
Comments in I<Oberon> start with C<(*> and end with C<*)>.
 | 
						|
See L<http://www.oberon.ethz.ch/oreport.html>.
 | 
						|
 | 
						|
=item Pascal
 | 
						|
 | 
						|
There are many implementations of Pascal. This modules provides
 | 
						|
pattern for comments of several implementations.
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item C<$RE{comment}{Pascal}>
 | 
						|
 | 
						|
This is the pattern that recognizes comments according to the Pascal ISO
 | 
						|
standard. This standard says that comments start with either C<{>, or
 | 
						|
C<(*>, and end with C<}> or C<*)>. This means that C<{*)> and C<(*}>
 | 
						|
are considered to be comments. Many Pascal applications don't allow this.
 | 
						|
See L<http://www.pascal-central.com/docs/iso10206.txt>
 | 
						|
 | 
						|
=item C<$RE{comment}{Pascal}{Alice}>
 | 
						|
 | 
						|
The I<Alice Pascal> compiler accepts comments that start with C<{>
 | 
						|
and end with C<}>. Comments are not allowed to contain newlines.
 | 
						|
See L<http://www.templetons.com/brad/alice/language/>.
 | 
						|
 | 
						|
=item C<$RE{comment}{Pascal}{Delphi}>, C<$RE{comment}{Pascal}{Free}>
 | 
						|
and C<$RE{comment}{Pascal}{GPC}>
 | 
						|
 | 
						|
The I<Delphi Pascal>, I<Free Pascal> and the I<Gnu Pascal Compiler>
 | 
						|
implementations of Pascal all have comments that either start with
 | 
						|
C<//> and last till the end of the line, are delimited with C<{>
 | 
						|
and C<}> or are delimited with C<(*> and C<*)>. Patterns for those
 | 
						|
comments are given by C<$RE{comment}{Pascal}{Delphi}>,
 | 
						|
C<$RE{comment}{Pascal}{Free}> and C<$RE{comment}{Pascal}{GPC}>
 | 
						|
respectively. These patterns only set C<$1> when C<{-keep}> is used,
 | 
						|
which will then include the entire comment.
 | 
						|
 | 
						|
See L<http://info.borland.com/techpubs/delphi5/oplg/>,
 | 
						|
L<http://www.freepascal.org/docs-html/ref/ref.html> and
 | 
						|
L<http://www.gnu-pascal.de/gpc/>.
 | 
						|
 | 
						|
=item C<$RE{comment}{Pascal}{Workshop}>
 | 
						|
 | 
						|
The I<Workshop Pascal> compiler, from SUN Microsystems, allows comments
 | 
						|
that are delimited with either C<{> and C<}>, delimited with
 | 
						|
C<(*)> and C<*>), delimited with C</*>, and C<*/>, or starting
 | 
						|
and ending with a double quote (C<">). When C<{-keep}> is used,
 | 
						|
only C<$1> is set, and returns the entire comment.
 | 
						|
 | 
						|
See L<http://docs.sun.com/db/doc/802-5762>.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=item PEARL
 | 
						|
 | 
						|
Comments in I<PEARL> start with a C<!> and last till the end of the
 | 
						|
line, or start with C</*> and end with C<*/>. With C<{-keep}>,
 | 
						|
C<$1> will be set to the entire comment.
 | 
						|
 | 
						|
=item PHP
 | 
						|
 | 
						|
Comments in I<PHP> start with either C<#> or C<//> and last till the
 | 
						|
end of the line, or are delimited by C</*> and C<*/>. With C<{-keep}>,
 | 
						|
C<$1> will be set to the entire comment.
 | 
						|
 | 
						|
=item PL/B
 | 
						|
 | 
						|
In I<PL/B>, comments start with either C<.> or C<;>, and end with the
 | 
						|
next newline. See L<http://www.mmcctech.com/pl-b/plb-0010.htm>.
 | 
						|
 | 
						|
=item PL/I
 | 
						|
 | 
						|
The I<PL/I> language has comments starting with C</*> and ending with C<*/>.
 | 
						|
 | 
						|
=item PL/SQL
 | 
						|
 | 
						|
In I<PL/SQL>, comments either start with C<--> and run till the end
 | 
						|
of the line, or start with C</*> and end with C<*/>.
 | 
						|
 | 
						|
=item Perl
 | 
						|
 | 
						|
I<Perl> uses comments that start with a C<#>, and continue till the end
 | 
						|
of the line.
 | 
						|
 | 
						|
=item Portia
 | 
						|
 | 
						|
The I<Portia> programming language has comments that start with C<//>,
 | 
						|
and last till the end of the line.
 | 
						|
 | 
						|
=item Python
 | 
						|
 | 
						|
I<Python> uses comments that start with a C<#>, and continue till the end
 | 
						|
of the line.
 | 
						|
 | 
						|
=item Q-BAL
 | 
						|
 | 
						|
Comments in the I<Q-BAL> language start with C<`> (a backtick), and
 | 
						|
continue till the end of the line.
 | 
						|
 | 
						|
=item QML
 | 
						|
 | 
						|
In C<QML>, comments start with C<#> and last till the end of the line.
 | 
						|
See L<http://www.questionmark.com/uk/qml/overview.doc>.
 | 
						|
 | 
						|
=item R
 | 
						|
 | 
						|
The statistical language I<R> uses comments that start with a C<#> and
 | 
						|
end with the following new line. See L<http://www.r-project.org/>.
 | 
						|
 | 
						|
=item REBOL
 | 
						|
 | 
						|
Comments for the I<REBOL> language start with C<;> and last till the
 | 
						|
end of the line.
 | 
						|
 | 
						|
=item Ruby
 | 
						|
 | 
						|
Comments in I<Ruby> start with C<#> and last till the end of the time.
 | 
						|
 | 
						|
=item Scheme
 | 
						|
 | 
						|
I<Scheme> comments start with C<;>, and last till the end of the line.
 | 
						|
See L<http://schemers.org/>.
 | 
						|
 | 
						|
=item shell
 | 
						|
 | 
						|
Comments in various I<shell>s start with a C<#> and end at the end of
 | 
						|
the line.
 | 
						|
 | 
						|
=item Shelta
 | 
						|
 | 
						|
The esotoric language I<Shelta> uses comments that start and end with
 | 
						|
a C<;>. See L<http://www.catseye.mb.ca/esoteric/shelta/index.html>.
 | 
						|
 | 
						|
=item SLIDE
 | 
						|
 | 
						|
The I<SLIDE> language has two forms of comments. First there is the
 | 
						|
line comment, which starts with a C<#> and includes the rest of the
 | 
						|
line (just like Perl). Second, there is the multiline, nested comment,
 | 
						|
which are delimited by C<(*> and C<*)>. Under C{-keep}>, only
 | 
						|
C<$1> is set, and is set to the entire comment. See
 | 
						|
L<http://www.cs.berkeley.edu/~ug/slide/docs/slide/spec/spec_frame_intro.shtml>.
 | 
						|
 | 
						|
=item slrn
 | 
						|
 | 
						|
Configuration files for I<slrn> have comments starting with a
 | 
						|
C<%> and lasting the rest of the line.
 | 
						|
 | 
						|
=item Smalltalk
 | 
						|
 | 
						|
I<Smalltalk> uses comments that start and end with a double quote, C<">.
 | 
						|
 | 
						|
=item SMITH
 | 
						|
 | 
						|
Comments in the I<SMITH> language start with C<;>, and last till the
 | 
						|
end of the line.
 | 
						|
 | 
						|
=item Squeak
 | 
						|
 | 
						|
In the Smalltalk variant I<Squeak>, comments start and end with
 | 
						|
C<">. Double quotes can appear inside comments by doubling them.
 | 
						|
 | 
						|
=item SQL
 | 
						|
 | 
						|
Standard I<SQL> uses comments starting with two or more dashes, and
 | 
						|
ending at the end of the line.
 | 
						|
 | 
						|
I<MySQL> does not follow the standard. Instead, it allows comments
 | 
						|
that start with a C<#> or C<-- > (that's two dashes and a space)
 | 
						|
ending with the following newline, and comments starting with
 | 
						|
C</*>, and ending with the next C<;> or C<*/> that isn't inside
 | 
						|
single or double quotes. A pattern for this is returned by
 | 
						|
C<$RE{comment}{SQL}{MySQL}>. With C<{-keep}>, only C<$1> will
 | 
						|
be set, and it returns the entire comment.
 | 
						|
 | 
						|
=item Tcl
 | 
						|
 | 
						|
In I<Tcl>, comments start with C<#> and continue till the end of the line.
 | 
						|
 | 
						|
=item TeX
 | 
						|
 | 
						|
The documentation language I<TeX> uses comments starting with C<%>
 | 
						|
and ending at the end of the line.
 | 
						|
 | 
						|
=item troff
 | 
						|
 | 
						|
The document formatting language I<troff> uses comments starting
 | 
						|
with C<\">, and continuing till the end of the line.
 | 
						|
 | 
						|
=item Ubercode
 | 
						|
 | 
						|
The Windows programming language I<Ubercode> uses comments that start with
 | 
						|
C<//> and continue to the end of the line. See L<http://www.ubercode.com>.
 | 
						|
 | 
						|
=item vi
 | 
						|
 | 
						|
In configuration files for the editor I<vi>, one can use comments
 | 
						|
starting with C<">, and ending at the end of the line.
 | 
						|
 | 
						|
=item *W
 | 
						|
 | 
						|
In the language I<*W>, comments start with C<||>, and end with C<!!>.
 | 
						|
 | 
						|
=item zonefile
 | 
						|
 | 
						|
Comments in DNS I<zonefile>s start with C<;>, and continue till the
 | 
						|
end of the line.
 | 
						|
 | 
						|
=item ZZT-OOP
 | 
						|
 | 
						|
The in-game language I<ZZT-OOP> uses comments that start with a C<'>
 | 
						|
character, and end at the following newline. See
 | 
						|
L<http://dave2.rocketjump.org/rad/zzthelp/lang.html>.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head1 REFERENCES
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item B<[Go 90]>
 | 
						|
 | 
						|
Charles F. Goldfarb: I<The SGML Handbook>. Oxford: Oxford University
 | 
						|
Press. B<1990>. ISBN 0-19-853737-9. Ch. 10.3, pp 390-391.
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head1 SEE ALSO
 | 
						|
 | 
						|
L<Regexp::Common> for a general description of how to use this interface.
 | 
						|
 | 
						|
=head1 AUTHOR
 | 
						|
 | 
						|
Damian Conway (damian@conway.org)
 | 
						|
 | 
						|
=head1 MAINTENANCE
 | 
						|
 | 
						|
This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>.
 | 
						|
 | 
						|
=head1 BUGS AND IRRITATIONS
 | 
						|
 | 
						|
Bound to be plenty.
 | 
						|
 | 
						|
For a start, there are many common regexes missing.
 | 
						|
Send them in to I<regexp-common@abigail.be>.
 | 
						|
 | 
						|
=head1 LICENSE and COPYRIGHT
 | 
						|
 | 
						|
This software is Copyright (c) 2001 - 2017, Damian Conway and Abigail.
 | 
						|
 | 
						|
This module is free software, and maybe used under any of the following
 | 
						|
licenses:
 | 
						|
 | 
						|
 1) The Perl Artistic License.     See the file COPYRIGHT.AL.
 | 
						|
 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
 | 
						|
 3) The BSD License.               See the file COPYRIGHT.BSD.
 | 
						|
 4) The MIT License.               See the file COPYRIGHT.MIT.
 | 
						|
 | 
						|
=cut
 | 
						|
EOC
 | 
						|
# 2}}}
 | 
						|
$Regexp_Common_Contents{'Common/balanced'} = <<'EOB';   # {{{2
 | 
						|
package Regexp::Common::balanced; {
 | 
						|
 | 
						|
use 5.10.0;
 | 
						|
 | 
						|
use strict;
 | 
						|
use warnings;
 | 
						|
no  warnings 'syntax';
 | 
						|
 | 
						|
use Regexp::Common qw /pattern clean no_defaults/;
 | 
						|
 | 
						|
our $VERSION = '2017060201';
 | 
						|
 | 
						|
my %closer = ( '{'=>'}', '('=>')', '['=>']', '<'=>'>' );
 | 
						|
my %cache;
 | 
						|
 | 
						|
sub nested {
 | 
						|
    my ($start, $finish) = @_;
 | 
						|
 | 
						|
    return $cache {$start} {$finish} if exists $cache {$start} {$finish};
 | 
						|
 | 
						|
    my @starts   = map {s/\\(.)/$1/g; $_} grep {length}
 | 
						|
                        $start  =~ /([^|\\]+|\\.)+/gs;
 | 
						|
    my @finishes = map {s/\\(.)/$1/g; $_} grep {length}
 | 
						|
                        $finish =~ /([^|\\]+|\\.)+/gs;
 | 
						|
 | 
						|
    push @finishes => ($finishes [-1]) x (@starts - @finishes);
 | 
						|
 | 
						|
    my @re;
 | 
						|
    local $" = "|";
 | 
						|
    foreach my $begin (@starts) {
 | 
						|
        my $end = shift @finishes;
 | 
						|
 | 
						|
        my $qb  = quotemeta $begin;
 | 
						|
        my $qe  = quotemeta $end;
 | 
						|
        my $fb  = quotemeta substr $begin => 0, 1;
 | 
						|
        my $fe  = quotemeta substr $end   => 0, 1;
 | 
						|
 | 
						|
        my $tb  = quotemeta substr $begin => 1;
 | 
						|
        my $te  = quotemeta substr $end   => 1;
 | 
						|
 | 
						|
        my $add;
 | 
						|
        if ($fb eq $fe) {
 | 
						|
            push @re =>
 | 
						|
                   qq /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|(?-1))*$qe)/;
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            my   @clauses =  "(?>[^$fb$fe]+)";
 | 
						|
            push @clauses => "$fb(?!$tb)" if length $tb;
 | 
						|
            push @clauses => "$fe(?!$te)" if length $te;
 | 
						|
            push @clauses => "(?-1)";
 | 
						|
            push @re      =>  qq /(?:$qb(?:@clauses)*$qe)/;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    $cache {$start} {$finish} = qr /(@re)/;
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
pattern name    => [qw /balanced -parens=() -begin= -end=/],
 | 
						|
        create  => sub {
 | 
						|
            my $flag = $_[1];
 | 
						|
            unless (defined $flag -> {-begin} && length $flag -> {-begin} &&
 | 
						|
                    defined $flag -> {-end}   && length $flag -> {-end}) {
 | 
						|
                my @open  = grep {index ($flag->{-parens}, $_) >= 0}
 | 
						|
                             ('[','(','{','<');
 | 
						|
                my @close = map {$closer {$_}} @open;
 | 
						|
                $flag -> {-begin} = join "|" => @open;
 | 
						|
                $flag -> {-end}   = join "|" => @close;
 | 
						|
            }
 | 
						|
            return nested @$flag {qw /-begin -end/};
 | 
						|
        },
 | 
						|
        ;
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
1;
 | 
						|
 | 
						|
__END__
 | 
						|
 | 
						|
=pod
 | 
						|
 | 
						|
=head1 NAME
 | 
						|
 | 
						|
Regexp::Common::balanced -- provide regexes for strings with balanced
 | 
						|
parenthesized delimiters or arbitrary delimiters.
 | 
						|
 | 
						|
=head1 SYNOPSIS
 | 
						|
 | 
						|
    use Regexp::Common qw /balanced/;
 | 
						|
 | 
						|
    while (<>) {
 | 
						|
        /$RE{balanced}{-parens=>'()'}/
 | 
						|
                                   and print q{balanced parentheses\n};
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
=head1 DESCRIPTION
 | 
						|
 | 
						|
Please consult the manual of L<Regexp::Common> for a general description
 | 
						|
of the works of this interface.
 | 
						|
 | 
						|
Do not use this module directly, but load it via I<Regexp::Common>.
 | 
						|
 | 
						|
=head2 C<$RE{balanced}{-parens}>
 | 
						|
 | 
						|
Returns a pattern that matches a string that starts with the nominated
 | 
						|
opening parenthesis or bracket, contains characters and properly nested
 | 
						|
parenthesized subsequences, and ends in the matching parenthesis.
 | 
						|
 | 
						|
More than one type of parenthesis can be specified:
 | 
						|
 | 
						|
        $RE{balanced}{-parens=>'(){}'}
 | 
						|
 | 
						|
in which case all specified parenthesis types must be correctly balanced within
 | 
						|
the string.
 | 
						|
 | 
						|
Since version 2013030901, C<< $1 >> will always be set (to the entire
 | 
						|
matched substring), regardless whether C<< {-keep} >> is used or not.
 | 
						|
 | 
						|
=head2 C<< $RE{balanced}{-begin => "begin"}{-end => "end"} >>
 | 
						|
 | 
						|
Returns a pattern that matches a string that is properly balanced
 | 
						|
using the I<begin> and I<end> strings as start and end delimiters.
 | 
						|
Multiple sets of begin and end strings can be given by separating
 | 
						|
them by C<|>s (which can be escaped with a backslash).
 | 
						|
 | 
						|
    qr/$RE{balanced}{-begin => "do|if|case"}{-end => "done|fi|esac"}/
 | 
						|
 | 
						|
will match properly balanced strings that either start with I<do> and
 | 
						|
end with I<done>, start with I<if> and end with I<fi>, or start with
 | 
						|
I<case> and end with I<esac>.
 | 
						|
 | 
						|
If I<-end> contains less cases than I<-begin>, the last case of I<-end>
 | 
						|
is repeated. If it contains more cases than I<-begin>, the extra cases
 | 
						|
are ignored. If either of I<-begin> or I<-end> isn't given, or is empty,
 | 
						|
I<< -begin => '(' >> and I<< -end => ')' >> are assumed.
 | 
						|
 | 
						|
Since version 2013030901, C<< $1 >> will always be set (to the entire
 | 
						|
matched substring), regardless whether C<< {-keep} >> is used or not.
 | 
						|
 | 
						|
=head2 Note
 | 
						|
 | 
						|
Since version 2013030901 the pattern will make of the recursive construct
 | 
						|
C<< (?-1) >>, instead of using the problematic C<< (??{ }) >> construct.
 | 
						|
This fixes an problem that was introduced in the 5.17 development track.
 | 
						|
 | 
						|
=head1 SEE ALSO
 | 
						|
 | 
						|
L<Regexp::Common> for a general description of how to use this interface.
 | 
						|
 | 
						|
=head1 AUTHOR
 | 
						|
 | 
						|
Damian Conway (damian@conway.org)
 | 
						|
 | 
						|
=head1 MAINTENANCE
 | 
						|
 | 
						|
This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>.
 | 
						|
 | 
						|
=head1 BUGS AND IRRITATIONS
 | 
						|
 | 
						|
Bound to be plenty.
 | 
						|
 | 
						|
For a start, there are many common regexes missing.
 | 
						|
Send them in to I<regexp-common@abigail.be>.
 | 
						|
 | 
						|
=head1 LICENSE and COPYRIGHT
 | 
						|
 | 
						|
This software is Copyright (c) 2001 - 2017, Damian Conway and Abigail.
 | 
						|
 | 
						|
This module is free software, and maybe used under any of the following
 | 
						|
licenses:
 | 
						|
 | 
						|
 1) The Perl Artistic License.     See the file COPYRIGHT.AL.
 | 
						|
 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
 | 
						|
 3) The BSD License.               See the file COPYRIGHT.BSD.
 | 
						|
 4) The MIT License.               See the file COPYRIGHT.MIT.
 | 
						|
 | 
						|
=cut
 | 
						|
EOB
 | 
						|
# 2}}}
 | 
						|
$Regexp_Common_Contents{'Common/delimited'} = <<'EOD';   # {{{2
 | 
						|
package Regexp::Common::delimited;
 | 
						|
 | 
						|
use 5.10.0;
 | 
						|
 | 
						|
use strict;
 | 
						|
use warnings;
 | 
						|
no  warnings 'syntax';
 | 
						|
 | 
						|
use Regexp::Common qw /pattern clean no_defaults/;
 | 
						|
 | 
						|
use charnames ':full';
 | 
						|
 | 
						|
our $VERSION = '2017060201';
 | 
						|
 | 
						|
sub gen_delimited {
 | 
						|
 | 
						|
    my ($dels, $escs, $cdels) = @_;
 | 
						|
    # return '(?:\S*)' unless $dels =~ /\S/;
 | 
						|
    if (defined $escs && length $escs) {
 | 
						|
        $escs  .= substr  ($escs, -1) x (length ($dels) - length  ($escs));
 | 
						|
    }
 | 
						|
    if (defined $cdels && length $cdels) {
 | 
						|
        $cdels .= substr ($cdels, -1) x (length ($dels) - length ($cdels));
 | 
						|
    }
 | 
						|
    else {
 | 
						|
        $cdels = $dels;
 | 
						|
    }
 | 
						|
 | 
						|
    my @pat = ();
 | 
						|
    for (my $i = 0; $i < length $dels; $i ++) {
 | 
						|
        my $del  = quotemeta substr  ($dels, $i, 1);
 | 
						|
        my $cdel = quotemeta substr ($cdels, $i, 1);
 | 
						|
        my $esc  = defined $escs && length ($escs)
 | 
						|
                           ? quotemeta substr ($escs, $i, 1) : "";
 | 
						|
        if ($cdel eq $esc) {
 | 
						|
            push @pat =>
 | 
						|
                "(?k:$del)(?k:[^$cdel]*(?:(?:$cdel$cdel)[^$cdel]*)*)(?k:$cdel)";
 | 
						|
        }
 | 
						|
        elsif (length $esc) {
 | 
						|
            push @pat =>
 | 
						|
                "(?k:$del)(?k:[^$esc$cdel]*(?:$esc.[^$esc$cdel]*)*)(?k:$cdel)";
 | 
						|
        }
 | 
						|
        else {
 | 
						|
            push @pat => "(?k:$del)(?k:[^$cdel]*)(?k:$cdel)";
 | 
						|
        }
 | 
						|
    }
 | 
						|
    my $pat = join '|', @pat;
 | 
						|
    return "(?k:(?|$pat))";
 | 
						|
}
 | 
						|
 | 
						|
sub _croak {
 | 
						|
    require Carp;
 | 
						|
    goto &Carp::croak;
 | 
						|
}
 | 
						|
 | 
						|
pattern name    => [qw( delimited -delim= -esc=\\ -cdelim= )],
 | 
						|
        create  => sub {my $flags = $_[1];
 | 
						|
                        _croak 'Must specify delimiter in $RE{delimited}'
 | 
						|
                              unless length $flags->{-delim};
 | 
						|
                        return gen_delimited (@{$flags}{-delim, -esc, -cdelim});
 | 
						|
                   },
 | 
						|
        ;
 | 
						|
 | 
						|
pattern name    => [qw( quoted -esc=\\ )],
 | 
						|
        create  => sub {my $flags = $_[1];
 | 
						|
                        return gen_delimited (q{"'`}, $flags -> {-esc});
 | 
						|
                   },
 | 
						|
        ;
 | 
						|
 | 
						|
 | 
						|
my @bracket_pairs;
 | 
						|
if ($] >= 5.014) {
 | 
						|
    #
 | 
						|
    # List from http://xahlee.info/comp/unicode_matching_brackets.html
 | 
						|
    #
 | 
						|
    @bracket_pairs =
 | 
						|
        map {ref $_ ? $_ :
 | 
						|
                /!/ ? [(do {my $x = $_; $x =~ s/!/TOP/;    $x},
 | 
						|
                        do {my $x = $_; $x =~ s/!/BOTTOM/; $x})]
 | 
						|
                    : [(do {my $x = $_; $x =~ s/\?/LEFT/;  $x},
 | 
						|
                        do {my $x = $_; $x =~ s/\?/RIGHT/; $x})]}
 | 
						|
            "? PARENTHESIS",
 | 
						|
            "? SQUARE BRACKET",
 | 
						|
            "? CURLY BRACKET",
 | 
						|
            "? DOUBLE QUOTATION MARK",
 | 
						|
            "? SINGLE QUOTATION MARK",
 | 
						|
            "SINGLE ?-POINTING ANGLE QUOTATION MARK",
 | 
						|
            "?-POINTING DOUBLE ANGLE QUOTATION MARK",
 | 
						|
            "FULLWIDTH ? PARENTHESIS",
 | 
						|
            "FULLWIDTH ? SQUARE BRACKET",
 | 
						|
            "FULLWIDTH ? CURLY BRACKET",
 | 
						|
            "FULLWIDTH ? WHITE PARENTHESIS",
 | 
						|
            "? WHITE PARENTHESIS",
 | 
						|
            "? WHITE SQUARE BRACKET",
 | 
						|
            "? WHITE CURLY BRACKET",
 | 
						|
            "? CORNER BRACKET",
 | 
						|
            "? ANGLE BRACKET",
 | 
						|
            "? DOUBLE ANGLE BRACKET",
 | 
						|
            "? BLACK LENTICULAR BRACKET",
 | 
						|
            "? TORTOISE SHELL BRACKET",
 | 
						|
            "? BLACK TORTOISE SHELL BRACKET",
 | 
						|
            "? WHITE CORNER BRACKET",
 | 
						|
            "? WHITE LENTICULAR BRACKET",
 | 
						|
            "? WHITE TORTOISE SHELL BRACKET",
 | 
						|
            "HALFWIDTH ? CORNER BRACKET",
 | 
						|
            "MATHEMATICAL ? WHITE SQUARE BRACKET",
 | 
						|
            "MATHEMATICAL ? ANGLE BRACKET",
 | 
						|
            "MATHEMATICAL ? DOUBLE ANGLE BRACKET",
 | 
						|
            "MATHEMATICAL ? FLATTENED PARENTHESIS",
 | 
						|
            "MATHEMATICAL ? WHITE TORTOISE SHELL BRACKET",
 | 
						|
            "? CEILING",
 | 
						|
            "? FLOOR",
 | 
						|
            "Z NOTATION ? IMAGE BRACKET",
 | 
						|
            "Z NOTATION ? BINDING BRACKET",
 | 
						|
            [   "HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT",
 | 
						|
                "HEAVY SINGLE " .   "COMMA QUOTATION MARK ORNAMENT", ],
 | 
						|
            [   "HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT",
 | 
						|
                "HEAVY DOUBLE " .   "COMMA QUOTATION MARK ORNAMENT", ],
 | 
						|
            "MEDIUM ? PARENTHESIS ORNAMENT",
 | 
						|
            "MEDIUM FLATTENED ? PARENTHESIS ORNAMENT",
 | 
						|
            "MEDIUM ? CURLY BRACKET ORNAMENT",
 | 
						|
            "MEDIUM ?-POINTING ANGLE BRACKET ORNAMENT",
 | 
						|
            "HEAVY ?-POINTING ANGLE QUOTATION MARK ORNAMENT",
 | 
						|
            "HEAVY ?-POINTING ANGLE BRACKET ORNAMENT",
 | 
						|
            "LIGHT ? TORTOISE SHELL BRACKET ORNAMENT",
 | 
						|
            "ORNATE ? PARENTHESIS",
 | 
						|
            "! PARENTHESIS",
 | 
						|
            "! SQUARE BRACKET",
 | 
						|
            "! CURLY BRACKET",
 | 
						|
            "! TORTOISE SHELL BRACKET",
 | 
						|
            "PRESENTATION FORM FOR VERTICAL ? CORNER BRACKET",
 | 
						|
            "PRESENTATION FORM FOR VERTICAL ? WHITE CORNER BRACKET",
 | 
						|
            "PRESENTATION FORM FOR VERTICAL ? TORTOISE SHELL BRACKET",
 | 
						|
            "PRESENTATION FORM FOR VERTICAL ? BLACK LENTICULAR BRACKET",
 | 
						|
            "PRESENTATION FORM FOR VERTICAL ? WHITE LENTICULAR BRACKET",
 | 
						|
            "PRESENTATION FORM FOR VERTICAL ? ANGLE BRACKET",
 | 
						|
            "PRESENTATION FORM FOR VERTICAL ? DOUBLE ANGLE BRACKET",
 | 
						|
            "PRESENTATION FORM FOR VERTICAL ? SQUARE BRACKET",
 | 
						|
            "PRESENTATION FORM FOR VERTICAL ? CURLY BRACKET",
 | 
						|
            "?-POINTING ANGLE BRACKET",
 | 
						|
            "? ANGLE BRACKET WITH DOT",
 | 
						|
            "?-POINTING CURVED ANGLE BRACKET",
 | 
						|
            "SMALL ? PARENTHESIS",
 | 
						|
            "SMALL ? CURLY BRACKET",
 | 
						|
            "SMALL ? TORTOISE SHELL BRACKET",
 | 
						|
            "SUPERSCRIPT ? PARENTHESIS",
 | 
						|
            "SUBSCRIPT ? PARENTHESIS",
 | 
						|
            "? SQUARE BRACKET WITH UNDERBAR",
 | 
						|
            [    "LEFT SQUARE BRACKET WITH TICK IN TOP CORNER",
 | 
						|
                "RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER", ],
 | 
						|
            [    "LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER",
 | 
						|
                "RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER", ],
 | 
						|
            "? SQUARE BRACKET WITH QUILL",
 | 
						|
            "TOP ? HALF BRACKET",
 | 
						|
            "BOTTOM ? HALF BRACKET",
 | 
						|
            "? S-SHAPED BAG DELIMITER",
 | 
						|
            [    "LEFT ARC LESS-THAN BRACKET",
 | 
						|
                "RIGHT ARC GREATER-THAN BRACKET",  ],
 | 
						|
            [    "DOUBLE LEFT ARC GREATER-THAN BRACKET",
 | 
						|
                "DOUBLE RIGHT ARC LESS-THAN BRACKET",  ],
 | 
						|
            "? SIDEWAYS U BRACKET",
 | 
						|
            "? DOUBLE PARENTHESIS",
 | 
						|
            "? WIGGLY FENCE",
 | 
						|
            "? DOUBLE WIGGLY FENCE",
 | 
						|
            "? LOW PARAPHRASE BRACKET",
 | 
						|
            "? RAISED OMISSION BRACKET",
 | 
						|
            "? SUBSTITUTION BRACKET",
 | 
						|
            "? DOTTED SUBSTITUTION BRACKET",
 | 
						|
            "? TRANSPOSITION BRACKET",
 | 
						|
            [   "OGHAM FEATHER MARK",
 | 
						|
                "OGHAM REVERSED FEATHER MARK",  ],
 | 
						|
            [   "TIBETAN MARK GUG RTAGS GYON",
 | 
						|
                "TIBETAN MARK GUG RTAGS GYAS",  ],
 | 
						|
            [   "TIBETAN MARK ANG KHANG GYON",
 | 
						|
                "TIBETAN MARK ANG KHANG GYAS",  ],
 | 
						|
    ;
 | 
						|
 | 
						|
    #
 | 
						|
    # Filter out unknown characters; this may run on an older version
 | 
						|
    # of Perl with an old version of Unicode.
 | 
						|
    #
 | 
						|
    @bracket_pairs = grep {defined charnames::string_vianame ($$_ [0]) &&
 | 
						|
                           defined charnames::string_vianame ($$_ [1])}
 | 
						|
                     @bracket_pairs;
 | 
						|
 | 
						|
    if (@bracket_pairs) {
 | 
						|
        my  $delims = join "" => map {charnames::string_vianame ($$_ [0])}
 | 
						|
                                     @bracket_pairs;
 | 
						|
        my $cdelims = join "" => map {charnames::string_vianame ($$_ [1])}
 | 
						|
                                     @bracket_pairs;
 | 
						|
 | 
						|
        pattern name   => [qw (bquoted -esc=\\)],
 | 
						|
                create => sub {my $flags = $_ [1];
 | 
						|
                               return gen_delimited ($delims, $flags -> {-esc},
 | 
						|
                                                    $cdelims);
 | 
						|
                          },
 | 
						|
                version => 5.014,
 | 
						|
                ;
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
#
 | 
						|
# Return the Unicode names of the pairs of matching delimiters.
 | 
						|
#
 | 
						|
sub bracket_pairs {@bracket_pairs}
 | 
						|
 | 
						|
1;
 | 
						|
 | 
						|
__END__
 | 
						|
 | 
						|
=pod
 | 
						|
 | 
						|
=head1 NAME
 | 
						|
 | 
						|
Regexp::Common::delimited -- provides a regex for delimited strings
 | 
						|
 | 
						|
=head1 SYNOPSIS
 | 
						|
 | 
						|
    use Regexp::Common qw /delimited/;
 | 
						|
 | 
						|
    while (<>) {
 | 
						|
        /$RE{delimited}{-delim=>'"'}/  and print 'a \" delimited string';
 | 
						|
        /$RE{delimited}{-delim=>'/'}/  and print 'a \/ delimited string';
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
=head1 DESCRIPTION
 | 
						|
 | 
						|
Please consult the manual of L<Regexp::Common> for a general description
 | 
						|
of the works of this interface.
 | 
						|
 | 
						|
Do not use this module directly, but load it via I<Regexp::Common>.
 | 
						|
 | 
						|
=head2 C<$RE{delimited}{-delim}{-cdelim}{-esc}>
 | 
						|
 | 
						|
Returns a pattern that matches a single-character-delimited substring,
 | 
						|
with optional internal escaping of the delimiter.
 | 
						|
 | 
						|
When C<-delim => I<S>> is specified, each character in the sequence I<S> is
 | 
						|
a possible delimiter. There is no default delimiter, so this flag must always
 | 
						|
be specified.
 | 
						|
 | 
						|
By default, the closing delimiter is the same character as the opening
 | 
						|
delimiter. If this is not wanted, for instance, if you want to match
 | 
						|
a string with symmetric delimiters, you can specify the closing delimiter(s)
 | 
						|
with C<-cdelim => I<S>>. Each character in I<S> is matched with the
 | 
						|
corresponding character supplied with the C<-delim> option. If the C<-cdelim>
 | 
						|
option has less characters than the C<-delim> option, the last character
 | 
						|
is repeated as often as necessary. If the C<-cdelim> option has more
 | 
						|
characters than the C<-delim> option, the extra characters are ignored.
 | 
						|
 | 
						|
If C<-esc => I<S>> is specified, each character in the sequence I<S> is
 | 
						|
the delimiter for the corresponding character in the C<-delim=I<S>> list.
 | 
						|
The default escape is backslash.
 | 
						|
 | 
						|
For example:
 | 
						|
 | 
						|
   $RE{delimited}{-delim=>'"'}               # match "a \" delimited string"
 | 
						|
   $RE{delimited}{-delim=>'"'}{-esc=>'"'}    # match "a "" delimited string"
 | 
						|
   $RE{delimited}{-delim=>'/'}               # match /a \/ delimited string/
 | 
						|
   $RE{delimited}{-delim=>q{'"}}             # match "string" or 'string'
 | 
						|
   $RE{delimited}{-delim=>"("}{-cdelim=>")"} # match (string)
 | 
						|
 | 
						|
Under C<-keep> (See L<Regexp::Common>):
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item $1
 | 
						|
 | 
						|
captures the entire match
 | 
						|
 | 
						|
=item $2
 | 
						|
 | 
						|
captures the opening delimiter
 | 
						|
 | 
						|
=item $3
 | 
						|
 | 
						|
captures delimited portion of the string
 | 
						|
 | 
						|
=item $4
 | 
						|
 | 
						|
captures the closing delimiter
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head2 $RE{quoted}{-esc}
 | 
						|
 | 
						|
A synonym for C<< $RE {delimited} {-delim => q {'"`}} {...} >>.
 | 
						|
 | 
						|
=head2 $RE {bquoted} {-esc}
 | 
						|
 | 
						|
This is a pattern which matches delimited strings, where the delimiters
 | 
						|
are a set of matching brackets. Currently, this comes 85 pairs. This
 | 
						|
includes the 60 pairs of bidirection paired brackets, as listed
 | 
						|
in L<< http://www.unicode.org/Public/UNIDATA/BidiBrackets.txt >>.
 | 
						|
 | 
						|
The other 25 pairs are the quotation marks, the double quotation
 | 
						|
marks, the single and double pointing quoation marks, the heavy
 | 
						|
single and double commas, 4 pairs of top-bottom parenthesis and
 | 
						|
brackets, 9 pairs of presentation form for vertical brackets,
 | 
						|
and the low paraphrase, raised omission, substitution, double
 | 
						|
substitution, and transposition brackets.
 | 
						|
 | 
						|
In a future update, pairs may be added (or deleted).
 | 
						|
 | 
						|
This pattern requires perl 5.14.0 or higher.
 | 
						|
 | 
						|
For a full list of bracket pairs, inspect the output of
 | 
						|
C<< Regexp::Common::delimited::bracket_pair () >>, which returns
 | 
						|
a list of two element arrays, each holding the Unicode names of
 | 
						|
matching pair of delimiters.
 | 
						|
 | 
						|
The C<< {-esc => I<S> } >> works as in the C<< $RE {delimited} >> pattern.
 | 
						|
 | 
						|
If C<< {-keep} >> is given, the following things will be captured:
 | 
						|
 | 
						|
=over 4
 | 
						|
 | 
						|
=item $1
 | 
						|
 | 
						|
captures the entire match
 | 
						|
 | 
						|
=item $2
 | 
						|
 | 
						|
captures the opening delimiter
 | 
						|
 | 
						|
=item $3
 | 
						|
 | 
						|
captures delimited portion of the string
 | 
						|
 | 
						|
=item $4
 | 
						|
 | 
						|
captures the closing delimiter
 | 
						|
 | 
						|
=back
 | 
						|
 | 
						|
=head1 SEE ALSO
 | 
						|
 | 
						|
L<Regexp::Common> for a general description of how to use this interface.
 | 
						|
 | 
						|
=head1 AUTHOR
 | 
						|
 | 
						|
Damian Conway (damian@conway.org)
 | 
						|
 | 
						|
=head1 MAINTENANCE
 | 
						|
 | 
						|
This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>.
 | 
						|
 | 
						|
=head1 BUGS AND IRRITATIONS
 | 
						|
 | 
						|
Bound to be plenty.
 | 
						|
 | 
						|
For a start, there are many common regexes missing.
 | 
						|
Send them in to I<regexp-common@abigail.be>.
 | 
						|
 | 
						|
=head1 LICENSE and COPYRIGHT
 | 
						|
 | 
						|
This software is Copyright (c) 2001 - 2017, Damian Conway and Abigail.
 | 
						|
 | 
						|
This module is free software, and maybe used under any of the following
 | 
						|
licenses:
 | 
						|
 | 
						|
 1) The Perl Artistic License.     See the file COPYRIGHT.AL.
 | 
						|
 2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
 | 
						|
 3) The BSD License.               See the file COPYRIGHT.BSD.
 | 
						|
 4) The MIT License.               See the file COPYRIGHT.MIT.
 | 
						|
 | 
						|
=cut
 | 
						|
EOD
 | 
						|
# 2}}}
 | 
						|
    my $problems        = 0;
 | 
						|
    $HAVE_Rexexp_Common = 0;
 | 
						|
    my $dir             = "";
 | 
						|
    if ($opt_sdir) {
 | 
						|
        ++$TEMP_OFF;
 | 
						|
        $dir = "$opt_sdir/$TEMP_OFF";
 | 
						|
        File::Path::rmtree($dir) if     is_dir($dir);
 | 
						|
        File::Path::mkpath($dir) unless is_dir($dir);
 | 
						|
    } else {
 | 
						|
        # let File::Temp create a suitable temporary directory
 | 
						|
        $dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
 | 
						|
        $TEMP_INST{ $dir } = "Regexp::Common";
 | 
						|
    }
 | 
						|
    print "Using temp dir [$dir] to install Regexp::Common\n" if $opt_v;
 | 
						|
    my $Regexp_dir        = "$dir/Regexp";
 | 
						|
    my $Regexp_Common_dir = "$dir/Regexp/Common";
 | 
						|
    mkdir $Regexp_dir       ;
 | 
						|
    mkdir $Regexp_Common_dir;
 | 
						|
 | 
						|
    foreach my $module_file (keys %Regexp_Common_Contents) {
 | 
						|
        my $OUT = open_file('>', "$dir/Regexp/${module_file}.pm", 1);
 | 
						|
        if (defined $OUT) {
 | 
						|
            print $OUT $Regexp_Common_Contents{$module_file};
 | 
						|
            $OUT->close;
 | 
						|
        } else {
 | 
						|
            warn "Failed to install Regexp::${module_file}.pm\n";
 | 
						|
            $problems = 1;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    push @INC, $dir;
 | 
						|
    eval "use Regexp::Common qw /comment RE_comment_HTML balanced/";
 | 
						|
    $HAVE_Rexexp_Common = 1 unless $problems;
 | 
						|
} # 1}}}
 | 
						|
sub Install_Algorithm_Diff {                 # {{{1
 | 
						|
    # Installs Tye McQueen's Algorithm::Diff module, v1.1902, into a
 | 
						|
    # temporary directory for the duration of this run.
 | 
						|
 | 
						|
my $Algorithm_Diff_Contents = <<'EOAlgDiff'; # {{{2
 | 
						|
package Algorithm::Diff;
 | 
						|
# Skip to first "=head" line for documentation.
 | 
						|
use strict;
 | 
						|
 | 
						|
use integer;    # see below in _replaceNextLargerWith() for mod to make
 | 
						|
                # if you don't use this
 | 
						|
use vars qw( $VERSION @EXPORT_OK );
 | 
						|
$VERSION = 1.19_02;
 | 
						|
#          ^ ^^ ^^-- Incremented at will
 | 
						|
#          | \+----- Incremented for non-trivial changes to features
 | 
						|
#          \-------- Incremented for fundamental changes
 | 
						|
require Exporter;
 | 
						|
*import    = \&Exporter::import;
 | 
						|
@EXPORT_OK = qw(
 | 
						|
    prepare LCS LCSidx LCS_length
 | 
						|
    diff sdiff compact_diff
 | 
						|
    traverse_sequences traverse_balanced
 | 
						|
);
 | 
						|
 | 
						|
# McIlroy-Hunt diff algorithm
 | 
						|
# Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com>
 | 
						|
# by Ned Konz, perl@bike-nomad.com
 | 
						|
# Updates by Tye McQueen, http://perlmonks.org/?node=tye
 | 
						|
 | 
						|
# Create a hash that maps each element of $aCollection to the set of
 | 
						|
# positions it occupies in $aCollection, restricted to the elements
 | 
						|
# within the range of indexes specified by $start and $end.
 | 
						|
# The fourth parameter is a subroutine reference that will be called to
 | 
						|
# generate a string to use as a key.
 | 
						|
# Additional parameters, if any, will be passed to this subroutine.
 | 
						|
#
 | 
						|
# my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen );
 | 
						|
 | 
						|
sub _withPositionsOfInInterval
 | 
						|
{
 | 
						|
    my $aCollection = shift;    # array ref
 | 
						|
    my $start       = shift;
 | 
						|
    my $end         = shift;
 | 
						|
    my $keyGen      = shift;
 | 
						|
    my %d;
 | 
						|
    my $index;
 | 
						|
    for ( $index = $start ; $index <= $end ; $index++ )
 | 
						|
    {
 | 
						|
        my $element = $aCollection->[$index];
 | 
						|
        my $key = &$keyGen( $element, @_ );
 | 
						|
        if ( exists( $d{$key} ) )
 | 
						|
        {
 | 
						|
            unshift ( @{ $d{$key} }, $index );
 | 
						|
        }
 | 
						|
        else
 | 
						|
        {
 | 
						|
            $d{$key} = [$index];
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return wantarray ? %d : \%d;
 | 
						|
}
 | 
						|
 | 
						|
# Find the place at which aValue would normally be inserted into the
 | 
						|
# array. If that place is already occupied by aValue, do nothing, and
 | 
						|
# return undef. If the place does not exist (i.e., it is off the end of
 | 
						|
# the array), add it to the end, otherwise replace the element at that
 | 
						|
# point with aValue.  It is assumed that the array's values are numeric.
 | 
						|
# This is where the bulk (75%) of the time is spent in this module, so
 | 
						|
# try to make it fast!
 | 
						|
 | 
						|
sub _replaceNextLargerWith
 | 
						|
{
 | 
						|
    my ( $array, $aValue, $high ) = @_;
 | 
						|
    $high ||= $#$array;
 | 
						|
 | 
						|
    # off the end?
 | 
						|
    if ( $high == -1 || $aValue > $array->[-1] )
 | 
						|
    {
 | 
						|
        push ( @$array, $aValue );
 | 
						|
        return $high + 1;
 | 
						|
    }
 | 
						|
 | 
						|
    # binary search for insertion point...
 | 
						|
    my $low = 0;
 | 
						|
    my $index;
 | 
						|
    my $found;
 | 
						|
    while ( $low <= $high )
 | 
						|
    {
 | 
						|
        $index = ( $high + $low ) / 2;
 | 
						|
 | 
						|
        # $index = int(( $high + $low ) / 2);  # without 'use integer'
 | 
						|
        $found = $array->[$index];
 | 
						|
 | 
						|
        if ( $aValue == $found )
 | 
						|
        {
 | 
						|
            return undef;
 | 
						|
        }
 | 
						|
        elsif ( $aValue > $found )
 | 
						|
        {
 | 
						|
            $low = $index + 1;
 | 
						|
        }
 | 
						|
        else
 | 
						|
        {
 | 
						|
            $high = $index - 1;
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    # now insertion point is in $low.
 | 
						|
    $array->[$low] = $aValue;    # overwrite next larger
 | 
						|
    return $low;
 | 
						|
}
 | 
						|
 | 
						|
# This method computes the longest common subsequence in $a and $b.
 | 
						|
 | 
						|
# Result is array or ref, whose contents is such that
 | 
						|
#   $a->[ $i ] == $b->[ $result[ $i ] ]
 | 
						|
# foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined.
 | 
						|
 | 
						|
# An additional argument may be passed; this is a hash or key generating
 | 
						|
# function that should return a string that uniquely identifies the given
 | 
						|
# element.  It should be the case that if the key is the same, the elements
 | 
						|
# will compare the same. If this parameter is undef or missing, the key
 | 
						|
# will be the element as a string.
 | 
						|
 | 
						|
# By default, comparisons will use "eq" and elements will be turned into keys
 | 
						|
# using the default stringizing operator '""'.
 | 
						|
 | 
						|
# Additional parameters, if any, will be passed to the key generation
 | 
						|
# routine.
 | 
						|
 | 
						|
sub _longestCommonSubsequence
 | 
						|
{
 | 
						|
    my $a        = shift;    # array ref or hash ref
 | 
						|
    my $b        = shift;    # array ref or hash ref
 | 
						|
    my $counting = shift;    # scalar
 | 
						|
    my $keyGen   = shift;    # code ref
 | 
						|
    my $compare;             # code ref
 | 
						|
 | 
						|
    if ( ref($a) eq 'HASH' )
 | 
						|
    {                        # prepared hash must be in $b
 | 
						|
        my $tmp = $b;
 | 
						|
        $b = $a;
 | 
						|
        $a = $tmp;
 | 
						|
    }
 | 
						|
 | 
						|
    # Check for bogus (non-ref) argument values
 | 
						|
    if ( !ref($a) || !ref($b) )
 | 
						|
    {
 | 
						|
        my @callerInfo = caller(1);
 | 
						|
        die 'error: must pass array or hash references to ' . $callerInfo[3];
 | 
						|
    }
 | 
						|
 | 
						|
    # set up code refs
 | 
						|
    # Note that these are optimized.
 | 
						|
    if ( !defined($keyGen) )    # optimize for strings
 | 
						|
    {
 | 
						|
        $keyGen = sub { $_[0] };
 | 
						|
        $compare = sub { my ( $a, $b ) = @_; $a eq $b };
 | 
						|
    }
 | 
						|
    else
 | 
						|
    {
 | 
						|
        $compare = sub {
 | 
						|
            my $a = shift;
 | 
						|
            my $b = shift;
 | 
						|
            &$keyGen( $a, @_ ) eq &$keyGen( $b, @_ );
 | 
						|
        };
 | 
						|
    }
 | 
						|
 | 
						|
    my ( $aStart, $aFinish, $matchVector ) = ( 0, $#$a, [] );
 | 
						|
    my ( $prunedCount, $bMatches ) = ( 0, {} );
 | 
						|
 | 
						|
    if ( ref($b) eq 'HASH' )    # was $bMatches prepared for us?
 | 
						|
    {
 | 
						|
        $bMatches = $b;
 | 
						|
    }
 | 
						|
    else
 | 
						|
    {
 | 
						|
        my ( $bStart, $bFinish ) = ( 0, $#$b );
 | 
						|
 | 
						|
        # First we prune off any common elements at the beginning
 | 
						|
        while ( $aStart <= $aFinish
 | 
						|
            and $bStart <= $bFinish
 | 
						|
            and &$compare( $a->[$aStart], $b->[$bStart], @_ ) )
 | 
						|
        {
 | 
						|
            $matchVector->[ $aStart++ ] = $bStart++;
 | 
						|
            $prunedCount++;
 | 
						|
        }
 | 
						|
 | 
						|
        # now the end
 | 
						|
        while ( $aStart <= $aFinish
 | 
						|
            and $bStart <= $bFinish
 | 
						|
            and &$compare( $a->[$aFinish], $b->[$bFinish], @_ ) )
 | 
						|
        {
 | 
						|
            $matchVector->[ $aFinish-- ] = $bFinish--;
 | 
						|
            $prunedCount++;
 | 
						|
        }
 | 
						|
 | 
						|
        # Now compute the equivalence classes of positions of elements
 | 
						|
        $bMatches =
 | 
						|
          _withPositionsOfInInterval( $b, $bStart, $bFinish, $keyGen, @_ );
 | 
						|
    }
 | 
						|
    my $thresh = [];
 | 
						|
    my $links  = [];
 | 
						|
 | 
						|
    my ( $i, $ai, $j, $k );
 | 
						|
    for ( $i = $aStart ; $i <= $aFinish ; $i++ )
 | 
						|
    {
 | 
						|
        $ai = &$keyGen( $a->[$i], @_ );
 | 
						|
        if ( exists( $bMatches->{$ai} ) )
 | 
						|
        {
 | 
						|
            $k = 0;
 | 
						|
            for $j ( @{ $bMatches->{$ai} } )
 | 
						|
            {
 | 
						|
 | 
						|
                # optimization: most of the time this will be true
 | 
						|
                if ( $k and $thresh->[$k] > $j and $thresh->[ $k - 1 ] < $j )
 | 
						|
                {
 | 
						|
                    $thresh->[$k] = $j;
 | 
						|
                }
 | 
						|
                else
 | 
						|
                {
 | 
						|
                    $k = _replaceNextLargerWith( $thresh, $j, $k );
 | 
						|
                }
 | 
						|
 | 
						|
                # oddly, it's faster to always test this (CPU cache?).
 | 
						|
                if ( defined($k) )
 | 
						|
                {
 | 
						|
                    $links->[$k] =
 | 
						|
                      [ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ];
 | 
						|
                }
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    if (@$thresh)
 | 
						|
    {
 | 
						|
        return $prunedCount + @$thresh if $counting;
 | 
						|
        for ( my $link = $links->[$#$thresh] ; $link ; $link = $link->[0] )
 | 
						|
        {
 | 
						|
            $matchVector->[ $link->[1] ] = $link->[2];
 | 
						|
        }
 | 
						|
    }
 | 
						|
    elsif ($counting)
 | 
						|
    {
 | 
						|
        return $prunedCount;
 | 
						|
    }
 | 
						|
 | 
						|
    return wantarray ? @$matchVector : $matchVector;
 | 
						|
}
 | 
						|
 | 
						|
sub traverse_sequences
 | 
						|
{
 | 
						|
    my $a                 = shift;          # array ref
 | 
						|
    my $b                 = shift;          # array ref
 | 
						|
    my $callbacks         = shift || {};
 | 
						|
    my $keyGen            = shift;
 | 
						|
    my $matchCallback     = $callbacks->{'MATCH'} || sub { };
 | 
						|
    my $discardACallback  = $callbacks->{'DISCARD_A'} || sub { };
 | 
						|
    my $finishedACallback = $callbacks->{'A_FINISHED'};
 | 
						|
    my $discardBCallback  = $callbacks->{'DISCARD_B'} || sub { };
 | 
						|
    my $finishedBCallback = $callbacks->{'B_FINISHED'};
 | 
						|
    my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ );
 | 
						|
 | 
						|
    # Process all the lines in @$matchVector
 | 
						|
    my $lastA = $#$a;
 | 
						|
    my $lastB = $#$b;
 | 
						|
    my $bi    = 0;
 | 
						|
    my $ai;
 | 
						|
 | 
						|
    for ( $ai = 0 ; $ai <= $#$matchVector ; $ai++ )
 | 
						|
    {
 | 
						|
        my $bLine = $matchVector->[$ai];
 | 
						|
        if ( defined($bLine) )    # matched
 | 
						|
        {
 | 
						|
            &$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine;
 | 
						|
            &$matchCallback( $ai,    $bi++, @_ );
 | 
						|
        }
 | 
						|
        else
 | 
						|
        {
 | 
						|
            &$discardACallback( $ai, $bi, @_ );
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    # The last entry (if any) processed was a match.
 | 
						|
    # $ai and $bi point just past the last matching lines in their sequences.
 | 
						|
 | 
						|
    while ( $ai <= $lastA or $bi <= $lastB )
 | 
						|
    {
 | 
						|
 | 
						|
        # last A?
 | 
						|
        if ( $ai == $lastA + 1 and $bi <= $lastB )
 | 
						|
        {
 | 
						|
            if ( defined($finishedACallback) )
 | 
						|
            {
 | 
						|
                &$finishedACallback( $lastA, @_ );
 | 
						|
                $finishedACallback = undef;
 | 
						|
            }
 | 
						|
            else
 | 
						|
            {
 | 
						|
                &$discardBCallback( $ai, $bi++, @_ ) while $bi <= $lastB;
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        # last B?
 | 
						|
        if ( $bi == $lastB + 1 and $ai <= $lastA )
 | 
						|
        {
 | 
						|
            if ( defined($finishedBCallback) )
 | 
						|
            {
 | 
						|
                &$finishedBCallback( $lastB, @_ );
 | 
						|
                $finishedBCallback = undef;
 | 
						|
            }
 | 
						|
            else
 | 
						|
            {
 | 
						|
                &$discardACallback( $ai++, $bi, @_ ) while $ai <= $lastA;
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        &$discardACallback( $ai++, $bi, @_ ) if $ai <= $lastA;
 | 
						|
        &$discardBCallback( $ai, $bi++, @_ ) if $bi <= $lastB;
 | 
						|
    }
 | 
						|
 | 
						|
    return 1;
 | 
						|
}
 | 
						|
 | 
						|
sub traverse_balanced
 | 
						|
{
 | 
						|
    my $a                 = shift;              # array ref
 | 
						|
    my $b                 = shift;              # array ref
 | 
						|
    my $callbacks         = shift || {};
 | 
						|
    my $keyGen            = shift;
 | 
						|
    my $matchCallback     = $callbacks->{'MATCH'} || sub { };
 | 
						|
    my $discardACallback  = $callbacks->{'DISCARD_A'} || sub { };
 | 
						|
    my $discardBCallback  = $callbacks->{'DISCARD_B'} || sub { };
 | 
						|
    my $changeCallback    = $callbacks->{'CHANGE'};
 | 
						|
    my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ );
 | 
						|
 | 
						|
    # Process all the lines in match vector
 | 
						|
    my $lastA = $#$a;
 | 
						|
    my $lastB = $#$b;
 | 
						|
    my $bi    = 0;
 | 
						|
    my $ai    = 0;
 | 
						|
    my $ma    = -1;
 | 
						|
    my $mb;
 | 
						|
 | 
						|
    while (1)
 | 
						|
    {
 | 
						|
 | 
						|
        # Find next match indices $ma and $mb
 | 
						|
        do {
 | 
						|
            $ma++;
 | 
						|
        } while(
 | 
						|
                $ma <= $#$matchVector
 | 
						|
            &&  !defined $matchVector->[$ma]
 | 
						|
        );
 | 
						|
 | 
						|
        last if $ma > $#$matchVector;    # end of matchVector?
 | 
						|
        $mb = $matchVector->[$ma];
 | 
						|
 | 
						|
        # Proceed with discard a/b or change events until
 | 
						|
        # next match
 | 
						|
        while ( $ai < $ma || $bi < $mb )
 | 
						|
        {
 | 
						|
 | 
						|
            if ( $ai < $ma && $bi < $mb )
 | 
						|
            {
 | 
						|
 | 
						|
                # Change
 | 
						|
                if ( defined $changeCallback )
 | 
						|
                {
 | 
						|
                    &$changeCallback( $ai++, $bi++, @_ );
 | 
						|
                }
 | 
						|
                else
 | 
						|
                {
 | 
						|
                    &$discardACallback( $ai++, $bi, @_ );
 | 
						|
                    &$discardBCallback( $ai, $bi++, @_ );
 | 
						|
                }
 | 
						|
            }
 | 
						|
            elsif ( $ai < $ma )
 | 
						|
            {
 | 
						|
                &$discardACallback( $ai++, $bi, @_ );
 | 
						|
            }
 | 
						|
            else
 | 
						|
            {
 | 
						|
 | 
						|
                # $bi < $mb
 | 
						|
                &$discardBCallback( $ai, $bi++, @_ );
 | 
						|
            }
 | 
						|
        }
 | 
						|
 | 
						|
        # Match
 | 
						|
        &$matchCallback( $ai++, $bi++, @_ );
 | 
						|
    }
 | 
						|
 | 
						|
    while ( $ai <= $lastA || $bi <= $lastB )
 | 
						|
    {
 | 
						|
        if ( $ai <= $lastA && $bi <= $lastB )
 | 
						|
        {
 | 
						|
 | 
						|
            # Change
 | 
						|
            if ( defined $changeCallback )
 | 
						|
            {
 | 
						|
                &$changeCallback( $ai++, $bi++, @_ );
 | 
						|
            }
 | 
						|
            else
 | 
						|
            {
 | 
						|
                &$discardACallback( $ai++, $bi, @_ );
 | 
						|
                &$discardBCallback( $ai, $bi++, @_ );
 | 
						|
            }
 | 
						|
        }
 | 
						|
        elsif ( $ai <= $lastA )
 | 
						|
        {
 | 
						|
            &$discardACallback( $ai++, $bi, @_ );
 | 
						|
        }
 | 
						|
        else
 | 
						|
        {
 | 
						|
 | 
						|
            # $bi <= $lastB
 | 
						|
            &$discardBCallback( $ai, $bi++, @_ );
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    return 1;
 | 
						|
}
 | 
						|
 | 
						|
sub prepare
 | 
						|
{
 | 
						|
    my $a       = shift;    # array ref
 | 
						|
    my $keyGen  = shift;    # code ref
 | 
						|
 | 
						|
    # set up code ref
 | 
						|
    $keyGen = sub { $_[0] } unless defined($keyGen);
 | 
						|
 | 
						|
    return scalar _withPositionsOfInInterval( $a, 0, $#$a, $keyGen, @_ );
 | 
						|
}
 | 
						|
 | 
						|
sub LCS
 | 
						|
{
 | 
						|
    my $a = shift;                  # array ref
 | 
						|
    my $b = shift;                  # array ref or hash ref
 | 
						|
    my $matchVector = _longestCommonSubsequence( $a, $b, 0, @_ );
 | 
						|
    my @retval;
 | 
						|
    my $i;
 | 
						|
    for ( $i = 0 ; $i <= $#$matchVector ; $i++ )
 | 
						|
    {
 | 
						|
        if ( defined( $matchVector->[$i] ) )
 | 
						|
        {
 | 
						|
            push ( @retval, $a->[$i] );
 | 
						|
        }
 | 
						|
    }
 | 
						|
    return wantarray ? @retval : \@retval;
 | 
						|
}
 | 
						|
 | 
						|
sub LCS_length
 | 
						|
{
 | 
						|
    my $a = shift;                          # array ref
 | 
						|
    my $b = shift;                          # array ref or hash ref
 | 
						|
    return _longestCommonSubsequence( $a, $b, 1, @_ );
 | 
						|
}
 | 
						|
 | 
						|
sub LCSidx
 | 
						|
{
 | 
						|
    my $a= shift @_;
 | 
						|
    my $b= shift @_;
 | 
						|
    my $match= _longestCommonSubsequence( $a, $b, 0, @_ );
 | 
						|
    my @am= grep defined $match->[$_], 0..$#$match;
 | 
						|
    my @bm= @{$match}[@am];
 | 
						|
    return \@am, \@bm;
 | 
						|
}
 | 
						|
 | 
						|
sub compact_diff
 | 
						|
{
 | 
						|
    my $a= shift @_;
 | 
						|
    my $b= shift @_;
 | 
						|
    my( $am, $bm )= LCSidx( $a, $b, @_ );
 | 
						|
    my @cdiff;
 | 
						|
    my( $ai, $bi )= ( 0, 0 );
 | 
						|
    push @cdiff, $ai, $bi;
 | 
						|
    while( 1 ) {
 | 
						|
        while(  @$am  &&  $ai == $am->[0]  &&  $bi == $bm->[0]  ) {
 | 
						|
            shift @$am;
 | 
						|
            shift @$bm;
 | 
						|
            ++$ai, ++$bi;
 | 
						|
        }
 | 
						|
        push @cdiff, $ai, $bi;
 | 
						|
        last   if  ! @$am;
 | 
						|
        $ai = $am->[0];
 | 
						|
        $bi = $bm->[0];
 | 
						|
        push @cdiff, $ai, $bi;
 | 
						|
    }
 | 
						|
    push @cdiff, 0+@$a, 0+@$b
 | 
						|
        if  $ai < @$a || $bi < @$b;
 | 
						|
    return wantarray ? @cdiff : \@cdiff;
 | 
						|
}
 | 
						|
 | 
						|
sub diff
 | 
						|
{
 | 
						|
    my $a      = shift;    # array ref
 | 
						|
    my $b      = shift;    # array ref
 | 
						|
    my $retval = [];
 | 
						|
    my $hunk   = [];
 | 
						|
    my $discard = sub {
 | 
						|
        push @$hunk, [ '-', $_[0], $a->[ $_[0] ] ];
 | 
						|
    };
 | 
						|
    my $add = sub {
 | 
						|
        push @$hunk, [ '+', $_[1], $b->[ $_[1] ] ];
 | 
						|
    };
 | 
						|
    my $match = sub {
 | 
						|
        push @$retval, $hunk
 | 
						|
            if 0 < @$hunk;
 | 
						|
        $hunk = []
 | 
						|
    };
 | 
						|
    traverse_sequences( $a, $b,
 | 
						|
        { MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add }, @_ );
 | 
						|
    &$match();
 | 
						|
    return wantarray ? @$retval : $retval;
 | 
						|
}
 | 
						|
 | 
						|
sub sdiff
 | 
						|
{
 | 
						|
    my $a      = shift;    # array ref
 | 
						|
    my $b      = shift;    # array ref
 | 
						|
    my $retval = [];
 | 
						|
    my $discard = sub { push ( @$retval, [ '-', $a->[ $_[0] ], "" ] ) };
 | 
						|
    my $add = sub { push ( @$retval, [ '+', "", $b->[ $_[1] ] ] ) };
 | 
						|
    my $change = sub {
 | 
						|
        push ( @$retval, [ 'c', $a->[ $_[0] ], $b->[ $_[1] ] ] );
 | 
						|
    };
 | 
						|
    my $match = sub {
 | 
						|
        push ( @$retval, [ 'u', $a->[ $_[0] ], $b->[ $_[1] ] ] );
 | 
						|
    };
 | 
						|
    traverse_balanced(
 | 
						|
        $a,
 | 
						|
        $b,
 | 
						|
        {
 | 
						|
            MATCH     => $match,
 | 
						|
            DISCARD_A => $discard,
 | 
						|
            DISCARD_B => $add,
 | 
						|
            CHANGE    => $change,
 | 
						|
        },
 | 
						|
        @_
 | 
						|
    );
 | 
						|
    return wantarray ? @$retval : $retval;
 | 
						|
}
 | 
						|
 | 
						|
########################################
 | 
						|
my $Root= __PACKAGE__;
 | 
						|
package Algorithm::Diff::_impl;
 | 
						|
use strict;
 | 
						|
 | 
						|
sub _Idx()  { 0 } # $me->[_Idx]: Ref to array of hunk indices
 | 
						|
            # 1   # $me->[1]: Ref to first sequence
 | 
						|
            # 2   # $me->[2]: Ref to second sequence
 | 
						|
sub _End()  { 3 } # $me->[_End]: Diff between forward and reverse pos
 | 
						|
sub _Same() { 4 } # $me->[_Same]: 1 if pos 1 contains unchanged items
 | 
						|
sub _Base() { 5 } # $me->[_Base]: Added to range's min and max
 | 
						|
sub _Pos()  { 6 } # $me->[_Pos]: Which hunk is currently selected
 | 
						|
sub _Off()  { 7 } # $me->[_Off]: Offset into _Idx for current position
 | 
						|
sub _Min() { -2 } # Added to _Off to get min instead of max+1
 | 
						|
 | 
						|
sub Die
 | 
						|
{
 | 
						|
    require Carp;
 | 
						|
    Carp::confess( @_ );
 | 
						|
}
 | 
						|
 | 
						|
sub _ChkPos
 | 
						|
{
 | 
						|
    my( $me )= @_;
 | 
						|
    return   if  $me->[_Pos];
 | 
						|
    my $meth= ( caller(1) )[3];
 | 
						|
    Die( "Called $meth on 'reset' object" );
 | 
						|
}
 | 
						|
 | 
						|
sub _ChkSeq
 | 
						|
{
 | 
						|
    my( $me, $seq )= @_;
 | 
						|
    return $seq + $me->[_Off]
 | 
						|
        if  1 == $seq  ||  2 == $seq;
 | 
						|
    my $meth= ( caller(1) )[3];
 | 
						|
    Die( "$meth: Invalid sequence number ($seq); must be 1 or 2" );
 | 
						|
}
 | 
						|
 | 
						|
sub getObjPkg
 | 
						|
{
 | 
						|
    my( $us )= @_;
 | 
						|
    return ref $us   if  ref $us;
 | 
						|
    return $us . "::_obj";
 | 
						|
}
 | 
						|
 | 
						|
sub new
 | 
						|
{
 | 
						|
    my( $us, $seq1, $seq2, $opts ) = @_;
 | 
						|
    my @args;
 | 
						|
    for( $opts->{keyGen} ) {
 | 
						|
        push @args, $_   if  $_;
 | 
						|
    }
 | 
						|
    for( $opts->{keyGenArgs} ) {
 | 
						|
        push @args, @$_   if  $_;
 | 
						|
    }
 | 
						|
    my $cdif= Algorithm::Diff::compact_diff( $seq1, $seq2, @args );
 | 
						|
    my $same= 1;
 | 
						|
    if(  0 == $cdif->[2]  &&  0 == $cdif->[3]  ) {
 | 
						|
        $same= 0;
 | 
						|
        splice @$cdif, 0, 2;
 | 
						|
    }
 | 
						|
    my @obj= ( $cdif, $seq1, $seq2 );
 | 
						|
    $obj[_End] = (1+@$cdif)/2;
 | 
						|
    $obj[_Same] = $same;
 | 
						|
    $obj[_Base] = 0;
 | 
						|
    my $me = bless \@obj, $us->getObjPkg();
 | 
						|
    $me->Reset( 0 );
 | 
						|
    return $me;
 | 
						|
}
 | 
						|
 | 
						|
sub Reset
 | 
						|
{
 | 
						|
    my( $me, $pos )= @_;
 | 
						|
    $pos= int( $pos || 0 );
 | 
						|
    $pos += $me->[_End]
 | 
						|
        if  $pos < 0;
 | 
						|
    $pos= 0
 | 
						|
        if  $pos < 0  ||  $me->[_End] <= $pos;
 | 
						|
    $me->[_Pos]= $pos || !1;
 | 
						|
    $me->[_Off]= 2*$pos - 1;
 | 
						|
    return $me;
 | 
						|
}
 | 
						|
 | 
						|
sub Base
 | 
						|
{
 | 
						|
    my( $me, $base )= @_;
 | 
						|
    my $oldBase= $me->[_Base];
 | 
						|
    $me->[_Base]= 0+$base   if  defined $base;
 | 
						|
    return $oldBase;
 | 
						|
}
 | 
						|
 | 
						|
sub Copy
 | 
						|
{
 | 
						|
    my( $me, $pos, $base )= @_;
 | 
						|
    my @obj= @$me;
 | 
						|
    my $you= bless \@obj, ref($me);
 | 
						|
    $you->Reset( $pos )   if  defined $pos;
 | 
						|
    $you->Base( $base );
 | 
						|
    return $you;
 | 
						|
}
 | 
						|
 | 
						|
sub Next {
 | 
						|
    my( $me, $steps )= @_;
 | 
						|
    $steps= 1   if  ! defined $steps;
 | 
						|
    if( $steps ) {
 | 
						|
        my $pos= $me->[_Pos];
 | 
						|
        my $new= $pos + $steps;
 | 
						|
        $new= 0   if  $pos  &&  $new < 0;
 | 
						|
        $me->Reset( $new )
 | 
						|
    }
 | 
						|
    return $me->[_Pos];
 | 
						|
}
 | 
						|
 | 
						|
sub Prev {
 | 
						|
    my( $me, $steps )= @_;
 | 
						|
    $steps= 1   if  ! defined $steps;
 | 
						|
    my $pos= $me->Next(-$steps);
 | 
						|
    $pos -= $me->[_End]   if  $pos;
 | 
						|
    return $pos;
 | 
						|
}
 | 
						|
 | 
						|
sub Diff {
 | 
						|
    my( $me )= @_;
 | 
						|
    $me->_ChkPos();
 | 
						|
    return 0   if  $me->[_Same] == ( 1 & $me->[_Pos] );
 | 
						|
    my $ret= 0;
 | 
						|
    my $off= $me->[_Off];
 | 
						|
    for my $seq ( 1, 2 ) {
 | 
						|
        $ret |= $seq
 | 
						|
            if  $me->[_Idx][ $off + $seq + _Min ]
 | 
						|
            <   $me->[_Idx][ $off + $seq ];
 | 
						|
    }
 | 
						|
    return $ret;
 | 
						|
}
 | 
						|
 | 
						|
sub Min {
 | 
						|
    my( $me, $seq, $base )= @_;
 | 
						|
    $me->_ChkPos();
 | 
						|
    my $off= $me->_ChkSeq($seq);
 | 
						|
    $base= $me->[_Base] if !defined $base;
 | 
						|
    return $base + $me->[_Idx][ $off + _Min ];
 | 
						|
}
 | 
						|
 | 
						|
sub Max {
 | 
						|
    my( $me, $seq, $base )= @_;
 | 
						|
    $me->_ChkPos();
 | 
						|
    my $off= $me->_ChkSeq($seq);
 | 
						|
    $base= $me->[_Base] if !defined $base;
 | 
						|
    return $base + $me->[_Idx][ $off ] -1;
 | 
						|
}
 | 
						|
 | 
						|
sub Range {
 | 
						|
    my( $me, $seq, $base )= @_;
 | 
						|
    $me->_ChkPos();
 | 
						|
    my $off = $me->_ChkSeq($seq);
 | 
						|
    if( !wantarray ) {
 | 
						|
        return  $me->[_Idx][ $off ]
 | 
						|
            -   $me->[_Idx][ $off + _Min ];
 | 
						|
    }
 | 
						|
    $base= $me->[_Base] if !defined $base;
 | 
						|
    return  ( $base + $me->[_Idx][ $off + _Min ] )
 | 
						|
        ..  ( $base + $me->[_Idx][ $off ] - 1 );
 | 
						|
}
 | 
						|
 | 
						|
sub Items {
 | 
						|
    my( $me, $seq )= @_;
 | 
						|
    $me->_ChkPos();
 | 
						|
    my $off = $me->_ChkSeq($seq);
 | 
						|
    if( !wantarray ) {
 | 
						|
        return  $me->[_Idx][ $off ]
 | 
						|
            -   $me->[_Idx][ $off + _Min ];
 | 
						|
    }
 | 
						|
    return
 | 
						|
        @{$me->[$seq]}[
 | 
						|
                $me->[_Idx][ $off + _Min ]
 | 
						|
            ..  ( $me->[_Idx][ $off ] - 1 )
 | 
						|
        ];
 | 
						|
}
 | 
						|
 | 
						|
sub Same {
 | 
						|
    my( $me )= @_;
 | 
						|
    $me->_ChkPos();
 | 
						|
    return wantarray ? () : 0
 | 
						|
        if  $me->[_Same] != ( 1 & $me->[_Pos] );
 | 
						|
    return $me->Items(1);
 | 
						|
}
 | 
						|
 | 
						|
my %getName;
 | 
						|
BEGIN {
 | 
						|
    %getName= (
 | 
						|
        same => \&Same,
 | 
						|
        diff => \&Diff,
 | 
						|
        base => \&Base,
 | 
						|
        min  => \&Min,
 | 
						|
        max  => \&Max,
 | 
						|
        range=> \&Range,
 | 
						|
        items=> \&Items, # same thing
 | 
						|
    );
 | 
						|
}
 | 
						|
 | 
						|
sub Get
 | 
						|
{
 | 
						|
    my $me= shift @_;
 | 
						|
    $me->_ChkPos();
 | 
						|
    my @value;
 | 
						|
    for my $arg (  @_  ) {
 | 
						|
        for my $word (  split ' ', $arg  ) {
 | 
						|
            my $meth;
 | 
						|
            if(     $word !~ /^(-?\d+)?([a-zA-Z]+)([12])?$/
 | 
						|
                ||  not  $meth= $getName{ lc $2 }
 | 
						|
            ) {
 | 
						|
                Die( $Root, ", Get: Invalid request ($word)" );
 | 
						|
            }
 | 
						|
            my( $base, $name, $seq )= ( $1, $2, $3 );
 | 
						|
            push @value, scalar(
 | 
						|
                4 == length($name)
 | 
						|
                    ? $meth->( $me )
 | 
						|
                    : $meth->( $me, $seq, $base )
 | 
						|
            );
 | 
						|
        }
 | 
						|
    }
 | 
						|
    if(  wantarray  ) {
 | 
						|
        return @value;
 | 
						|
    } elsif(  1 == @value  ) {
 | 
						|
        return $value[0];
 | 
						|
    }
 | 
						|
    Die( 0+@value, " values requested from ",
 | 
						|
        $Root, "'s Get in scalar context" );
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
my $Obj= getObjPkg($Root);
 | 
						|
no strict 'refs';
 | 
						|
 | 
						|
for my $meth (  qw( new getObjPkg )  ) {
 | 
						|
    *{$Root."::".$meth} = \&{$meth};
 | 
						|
    *{$Obj ."::".$meth} = \&{$meth};
 | 
						|
}
 | 
						|
for my $meth (  qw(
 | 
						|
    Next Prev Reset Copy Base Diff
 | 
						|
    Same Items Range Min Max Get
 | 
						|
    _ChkPos _ChkSeq
 | 
						|
)  ) {
 | 
						|
    *{$Obj."::".$meth} = \&{$meth};
 | 
						|
}
 | 
						|
 | 
						|
1;
 | 
						|
# This version released by Tye McQueen (http://perlmonks.org/?node=tye).
 | 
						|
#
 | 
						|
# =head1 LICENSE
 | 
						|
#
 | 
						|
# Parts Copyright (c) 2000-2004 Ned Konz.  All rights reserved.
 | 
						|
# Parts by Tye McQueen.
 | 
						|
#
 | 
						|
# This program is free software; you can redistribute it and/or modify it
 | 
						|
# under the same terms as Perl.
 | 
						|
#
 | 
						|
# =head1 MAILING LIST
 | 
						|
#
 | 
						|
# Mark-Jason still maintains a mailing list.  To join a low-volume mailing
 | 
						|
# list for announcements related to diff and Algorithm::Diff, send an
 | 
						|
# empty mail message to mjd-perl-diff-request@plover.com.
 | 
						|
# =head1 CREDITS
 | 
						|
#
 | 
						|
# Versions through 0.59 (and much of this documentation) were written by:
 | 
						|
#
 | 
						|
# Mark-Jason Dominus, mjd-perl-diff@plover.com
 | 
						|
#
 | 
						|
# This version borrows some documentation and routine names from
 | 
						|
# Mark-Jason's, but Diff.pm's code was completely replaced.
 | 
						|
#
 | 
						|
# This code was adapted from the Smalltalk code of Mario Wolczko
 | 
						|
# <mario@wolczko.com>, which is available at
 | 
						|
# ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st
 | 
						|
#
 | 
						|
# C<sdiff> and C<traverse_balanced> were written by Mike Schilli
 | 
						|
# <m@perlmeister.com>.
 | 
						|
#
 | 
						|
# The algorithm is that described in
 | 
						|
# I<A Fast Algorithm for Computing Longest Common Subsequences>,
 | 
						|
# CACM, vol.20, no.5, pp.350-353, May 1977, with a few
 | 
						|
# minor improvements to improve the speed.
 | 
						|
#
 | 
						|
# Much work was done by Ned Konz (perl@bike-nomad.com).
 | 
						|
#
 | 
						|
# The OO interface and some other changes are by Tye McQueen.
 | 
						|
#
 | 
						|
EOAlgDiff
 | 
						|
# 2}}}
 | 
						|
    my $problems        = 0;
 | 
						|
    $HAVE_Algorithm_Diff = 0;
 | 
						|
    my $dir             = "";
 | 
						|
    if ($opt_sdir) {
 | 
						|
        ++$TEMP_OFF;
 | 
						|
        $dir = "$opt_sdir/$TEMP_OFF";
 | 
						|
        File::Path::rmtree($dir) if     is_dir($dir);
 | 
						|
        File::Path::mkpath($dir) unless is_dir($dir);
 | 
						|
    } else {
 | 
						|
        # let File::Temp create a suitable temporary directory
 | 
						|
        $dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
 | 
						|
        $TEMP_INST{ $dir } = "Algorithm::Diff";
 | 
						|
    }
 | 
						|
    print "Using temp dir [$dir] to install Algorithm::Diff\n" if $opt_v;
 | 
						|
    my $Algorithm_dir      = "$dir/Algorithm";
 | 
						|
    my $Algorithm_Diff_dir = "$dir/Algorithm/Diff";
 | 
						|
    mkdir $Algorithm_dir     ;
 | 
						|
    mkdir $Algorithm_Diff_dir;
 | 
						|
 | 
						|
    my $OUT = open_file('>', "$dir/Algorithm/Diff.pm", 1);
 | 
						|
    if (defined $OUT) {
 | 
						|
        print $OUT $Algorithm_Diff_Contents;
 | 
						|
        $OUT->close;
 | 
						|
    } else {
 | 
						|
        warn "Failed to install Algorithm/Diff.pm\n";
 | 
						|
        $problems = 1;
 | 
						|
    }
 | 
						|
 | 
						|
    push @INC, $dir;  # between this & Regexp::Common only need to do once
 | 
						|
    eval "use Algorithm::Diff qw / sdiff /";
 | 
						|
    $HAVE_Algorithm_Diff = 1 unless $problems;
 | 
						|
} # 1}}}
 | 
						|
__END__
 | 
						|
mode values (stat $item)[2]
 | 
						|
       Unix    Windows
 | 
						|
file:  33188   33206
 | 
						|
dir :  16832   16895
 | 
						|
link:  33261   33206
 | 
						|
pipe:   4544    null
 |