18207 lines
		
	
	
		
			731 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			18207 lines
		
	
	
		
			731 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env perl
 | |
| # cloc -- Count Lines of Code                  {{{1
 | |
| # Copyright (C) 2006-2024 Al Danial <al.danial@gmail.com>
 | |
| # First release August 2006
 | |
| #
 | |
| # Includes code from:
 | |
| #   - SLOCCount v2.26
 | |
| #     http://www.dwheeler.com/sloccount/
 | |
| #     by David Wheeler.
 | |
| #   - Regexp::Common v2017060201
 | |
| #     https://metacpan.org/pod/Regexp::Common
 | |
| #     by Damian Conway and Abigail.
 | |
| #   - Win32::Autoglob 1.01
 | |
| #     https://metacpan.org/pod/Win32::Autoglob
 | |
| #     by Sean M. Burke.
 | |
| #   - Algorithm::Diff 1.1902
 | |
| #     https://metacpan.org/pod/Algorithm::Diff
 | |
| #     by Tye McQueen.
 | |
| #
 | |
| # This program is free software; you can redistribute it and/or modify
 | |
| # it under the terms of the GNU General Public License as published by
 | |
| # the Free Software Foundation; either version 2 of the License, or
 | |
| # (at your option) any later version.
 | |
| #
 | |
| # This program is distributed in the hope that it will be useful,
 | |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
| # GNU General Public License for more details:
 | |
| # <http://www.gnu.org/licenses/gpl.txt>.
 | |
| #
 | |
| # 1}}}
 | |
| my $VERSION = "2.01";  # odd number == beta; even number == stable
 | |
| my $URL     = "github.com/AlDanial/cloc";  # 'https://' pushes header too wide
 | |
| require 5.10.0;
 | |
| # use modules                                  {{{1
 | |
| use warnings;
 | |
| use strict;
 | |
| 
 | |
| use Getopt::Long;
 | |
| use File::Basename;
 | |
| use File::Temp qw { tempfile tempdir };
 | |
| use File::Find;
 | |
| use File::Path;
 | |
| use File::Spec;
 | |
| use IO::File;
 | |
| use List::Util qw( min max );
 | |
| use Cwd;
 | |
| use POSIX qw { strftime ceil};
 | |
| # Parallel::ForkManager isn't in the standard distribution.
 | |
| # Use it only if installed, and only if --processes=N is given.
 | |
| # The module load happens in get_max_processes().
 | |
| my $HAVE_Parallel_ForkManager = 0;
 | |
| 
 | |
| # Digest::MD5 isn't in the standard distribution. Use it only if installed.
 | |
| my $HAVE_Digest_MD5 = 0;
 | |
| eval "use Digest::MD5;";
 | |
| if (defined $Digest::MD5::VERSION) {
 | |
|     $HAVE_Digest_MD5 = 1;
 | |
| } else {
 | |
|     warn "Digest::MD5 not installed; will skip file uniqueness checks.\n";
 | |
| }
 | |
| 
 | |
| # Time::HiRes became standard with Perl 5.8
 | |
| my $HAVE_Time_HiRes = 0;
 | |
| eval "use Time::HiRes;";
 | |
| $HAVE_Time_HiRes = 1 if defined $Time::HiRes::VERSION;
 | |
| 
 | |
| my $HAVE_Rexexp_Common;
 | |
| # Regexp::Common isn't in the standard distribution.  It will
 | |
| # be installed in a temp directory if necessary.
 | |
| eval "use Regexp::Common qw ( comment ) ";
 | |
| if (defined $Regexp::Common::VERSION) {
 | |
|     $HAVE_Rexexp_Common = 1;
 | |
| } else {
 | |
|     $HAVE_Rexexp_Common = 0;
 | |
| }
 | |
| 
 | |
| my $HAVE_Algorithm_Diff = 0;
 | |
| # Algorithm::Diff isn't in the standard distribution.  It will
 | |
| # be installed in a temp directory if necessary.
 | |
| eval "use Algorithm::Diff qw ( sdiff ) ";
 | |
| if (defined $Algorithm::Diff::VERSION) {
 | |
|     $HAVE_Algorithm_Diff = 1;
 | |
| } else {
 | |
|     Install_Algorithm_Diff();
 | |
| }
 | |
| 
 | |
| # print "2 HAVE_Algorithm_Diff = $HAVE_Algorithm_Diff\n";
 | |
| # test_alg_diff($ARGV[$#ARGV - 1], $ARGV[$#ARGV]); die;
 | |
| # die "Hre=$HAVE_Rexexp_Common  Had=$HAVE_Algorithm_Diff";
 | |
| 
 | |
| # Uncomment next two lines when building Windows executable with perl2exe
 | |
| # or if running on a system that already has Regexp::Common.
 | |
| #use Regexp::Common;
 | |
| #$HAVE_Rexexp_Common = 1;
 | |
| 
 | |
| #perl2exe_include "Regexp/Common/whitespace.pm"
 | |
| #perl2exe_include "Regexp/Common/URI.pm"
 | |
| #perl2exe_include "Regexp/Common/URI/fax.pm"
 | |
| #perl2exe_include "Regexp/Common/URI/file.pm"
 | |
| #perl2exe_include "Regexp/Common/URI/ftp.pm"
 | |
| #perl2exe_include "Regexp/Common/URI/gopher.pm"
 | |
| #perl2exe_include "Regexp/Common/URI/http.pm"
 | |
| #perl2exe_include "Regexp/Common/URI/pop.pm"
 | |
| #perl2exe_include "Regexp/Common/URI/prospero.pm"
 | |
| #perl2exe_include "Regexp/Common/URI/news.pm"
 | |
| #perl2exe_include "Regexp/Common/URI/tel.pm"
 | |
| #perl2exe_include "Regexp/Common/URI/telnet.pm"
 | |
| #perl2exe_include "Regexp/Common/URI/tv.pm"
 | |
| #perl2exe_include "Regexp/Common/URI/wais.pm"
 | |
| #perl2exe_include "Regexp/Common/CC.pm"
 | |
| #perl2exe_include "Regexp/Common/SEN.pm"
 | |
| #perl2exe_include "Regexp/Common/number.pm"
 | |
| #perl2exe_include "Regexp/Common/delimited.pm"
 | |
| #perl2exe_include "Regexp/Common/profanity.pm"
 | |
| #perl2exe_include "Regexp/Common/net.pm"
 | |
| #perl2exe_include "Regexp/Common/zip.pm"
 | |
| #perl2exe_include "Regexp/Common/comment.pm"
 | |
| #perl2exe_include "Regexp/Common/balanced.pm"
 | |
| #perl2exe_include "Regexp/Common/lingua.pm"
 | |
| #perl2exe_include "Regexp/Common/list.pm"
 | |
| #perl2exe_include "File/Glob.pm"
 | |
| 
 | |
| use Text::Tabs qw { expand };
 | |
| use Cwd qw { cwd };
 | |
| use File::Glob;
 | |
| # 1}}}
 | |
| # Usage information, options processing.       {{{1
 | |
| my $ON_WINDOWS = 0;
 | |
|    $ON_WINDOWS = 1 if ($^O =~ /^MSWin/) or ($^O eq "Windows_NT");
 | |
| if ($ON_WINDOWS and $ENV{'SHELL'}) {
 | |
|     if ($ENV{'SHELL'} =~ m{^/}) {
 | |
|         $ON_WINDOWS = 0;  # make Cygwin look like Unix
 | |
|     } else {
 | |
|         $ON_WINDOWS = 1;  # MKS defines $SHELL but still acts like Windows
 | |
|     }
 | |
| }
 | |
| 
 | |
| my $HAVE_Win32_Long_Path = 0;
 | |
| # Win32::LongPath is an optional dependency that when available on
 | |
| # Windows will be used to support reading files past the 255 char
 | |
| # path length limit.
 | |
| if ($ON_WINDOWS) {
 | |
|     eval "use Win32::LongPath;";
 | |
|     if (defined $Win32::LongPath::VERSION) {
 | |
|         $HAVE_Win32_Long_Path = 1;
 | |
|     }
 | |
| }
 | |
| my $config_file = '';
 | |
| if ( $ENV{'HOME'} ) {
 | |
|     $config_file = File::Spec->catfile( $ENV{'HOME'}, '.config', 'cloc', 'options.txt');
 | |
| } elsif ( $ENV{'APPDATA'} and $ON_WINDOWS ) {
 | |
|     $config_file = File::Spec->catfile( $ENV{'APPDATA'}, 'cloc');
 | |
| }
 | |
| # $config_file may be updated by check_alternate_config_files()
 | |
| 
 | |
| my $NN     = chr(27) . "[0m";  # normal
 | |
|    $NN     = "" if $ON_WINDOWS or !(-t STDOUT); # -t STDOUT:  is it a terminal?
 | |
| my $BB     = chr(27) . "[1m";  # bold
 | |
|    $BB     = "" if $ON_WINDOWS or !(-t STDOUT);
 | |
| my $script = basename $0;
 | |
| 
 | |
| #  Intended for v1.88:
 | |
| #  --git-diff-simindex       Git diff strategy #3:  use git's similarity index
 | |
| #                            (git diff -M --name-status) to identify file pairs
 | |
| #                            to compare.  This is especially useful to compare
 | |
| #                            files that were renamed between the commits.
 | |
| 
 | |
| my $brief_usage  = "
 | |
|                        cloc -- Count Lines of Code
 | |
| 
 | |
| Usage:
 | |
|     $script [options] <file(s)/dir(s)/git hash(es)>
 | |
|         Count physical lines of source code and comments in the given files
 | |
|         (may be archives such as compressed tarballs or zip files) and/or
 | |
|         recursively below the given directories or git commit hashes.
 | |
|         Example:    cloc src/ include/ main.c
 | |
| 
 | |
|     $script [options] --diff <set1>  <set2>
 | |
|         Compute differences of physical lines of source code and comments
 | |
|         between any pairwise combination of directory names, archive
 | |
|         files or git commit hashes.
 | |
|         Example:    cloc --diff Python-3.5.tar.xz python-3.6/
 | |
| 
 | |
| $script --help  shows full documentation on the options.
 | |
| https://$URL has numerous examples and more information.
 | |
| ";
 | |
| my $usage  = "
 | |
| Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <report files>
 | |
| 
 | |
|  Count, or compute differences of, physical lines of source code in the
 | |
|  given files (may be archives such as compressed tarballs or zip files,
 | |
|  or git commit hashes or branch names) and/or recursively below the
 | |
|  given directories.
 | |
| 
 | |
|  ${BB}Input Options${NN}
 | |
|    --extract-with=<cmd>      This option is only needed if cloc is unable
 | |
|                              to figure out how to extract the contents of
 | |
|                              the input file(s) by itself.
 | |
|                              Use <cmd> to extract binary archive files (e.g.:
 | |
|                              .tar.gz, .zip, .Z).  Use the literal '>FILE<' as
 | |
|                              a stand-in for the actual file(s) to be
 | |
|                              extracted.  For example, to count lines of code
 | |
|                              in the input files
 | |
|                                 gcc-4.2.tar.gz  perl-5.8.8.tar.gz
 | |
|                              on Unix use
 | |
|                                --extract-with='gzip -dc >FILE< | tar xf -'
 | |
|                              or, if you have GNU tar,
 | |
|                                --extract-with='tar zxf >FILE<'
 | |
|                              and on Windows use, for example:
 | |
|                                --extract-with=\"\\\"c:\\Program Files\\WinZip\\WinZip32.exe\\\" -e -o >FILE< .\"
 | |
|                              (if WinZip is installed there).
 | |
|    --list-file=<file>        Take the list of file and/or directory names to
 | |
|                              process from <file>, which has one file/directory
 | |
|                              name per line.  Only exact matches are counted;
 | |
|                              relative path names will be resolved starting from
 | |
|                              the directory where cloc is invoked.  Set <file>
 | |
|                              to - to read file names from a STDIN pipe.
 | |
|                              See also --exclude-list-file, --config.
 | |
|    --diff-list-file=<file>   Take the pairs of file names to be diff'ed from
 | |
|                              <file>, whose format matches the output of
 | |
|                              --diff-alignment.  (Run with that option to
 | |
|                              see a sample.)  The language identifier at the
 | |
|                              end of each line is ignored.  This enables --diff
 | |
|                              mode and bypasses file pair alignment logic.
 | |
|                              Use --diff-list-files to define the file name
 | |
|                              pairs in separate files. See also --config.
 | |
|    --diff-list-files <file1> <file2>
 | |
|                              Compute differences in code and comments between
 | |
|                              the files and directories listed in <file1> and
 | |
|                              <file2>.  Each input file should use the same
 | |
|                              format as --list-file, where there is one file or
 | |
|                              directory name per line.  Only exact matches are
 | |
|                              counted; relative path names will be resolved
 | |
|                              starting from the directory where cloc is invoked.
 | |
|                              This enables --diff mode.  See also --list-file,
 | |
|                              --diff-list-file, --diff.
 | |
|    --vcs=<VCS>               Invoke a system call to <VCS> to obtain a list of
 | |
|                              files to work on.  If <VCS> is 'git', then will
 | |
|                              invoke 'git ls-files' to get a file list and
 | |
|                              'git submodule status' to get a list of submodules
 | |
|                              whose contents will be ignored.  See also --git
 | |
|                              which accepts git commit hashes and branch names.
 | |
|                              If <VCS> is 'svn' then will invoke 'svn list -R'.
 | |
|                              The primary benefit is that cloc will then skip
 | |
|                              files explicitly excluded by the versioning tool
 | |
|                              in question, ie, those in .gitignore or have the
 | |
|                              svn:ignore property.
 | |
|                              Alternatively <VCS> may be any system command
 | |
|                              that generates a list of files.
 | |
|                              Note:  cloc must be in a directory which can read
 | |
|                              the files as they are returned by <VCS>.  cloc will
 | |
|                              not download files from remote repositories.
 | |
|                              'svn list -R' may refer to a remote repository
 | |
|                              to obtain file names (and therefore may require
 | |
|                              authentication to the remote repository), but
 | |
|                              the files themselves must be local.
 | |
|                              Setting <VCS> to 'auto' selects between 'git'
 | |
|                              and 'svn' (or neither) depending on the presence
 | |
|                              of a .git or .svn subdirectory below the directory
 | |
|                              where cloc is invoked.
 | |
|    --unicode                 Check binary files to see if they contain Unicode
 | |
|                              expanded ASCII text.  This causes performance to
 | |
|                              drop noticeably.
 | |
| 
 | |
|  ${BB}Processing Options${NN}
 | |
|    --autoconf                Count .in files (as processed by GNU autoconf) of
 | |
|                              recognized languages.  See also --no-autogen.
 | |
|    --by-file                 Report results for every source file encountered.
 | |
|                              See also --fmt under 'Output Options'.
 | |
|    --by-file-by-lang         Report results for every source file encountered
 | |
|                              in addition to reporting by language.
 | |
|    --config <file>           Read command line switches from <file> instead of
 | |
|                              the default location of $config_file.
 | |
|                              The file should contain one switch, along with
 | |
|                              arguments (if any), per line.  Blank lines and lines
 | |
|                              beginning with '#' are skipped.  Options given on
 | |
|                              the command line take priority over entries read from
 | |
|                              the file.
 | |
|                              If a directory is also given with any of these
 | |
|                              switches: --list-file, --exclude-list-file,
 | |
|                              --read-lang-def, --force-lang-def, --diff-list-file
 | |
|                              and a config file exists in that directory, it will
 | |
|                              take priority over $config_file.
 | |
|    --count-and-diff <set1> <set2>
 | |
|                              First perform direct code counts of source file(s)
 | |
|                              of <set1> and <set2> separately, then perform a diff
 | |
|                              of these.  Inputs may be pairs of files, directories,
 | |
|                              or archives.  If --out or --report-file is given,
 | |
|                              three output files will be created, one for each
 | |
|                              of the two counts and one for the diff.  See also
 | |
|                              --diff, --diff-alignment, --diff-timeout,
 | |
|                              --ignore-case, --ignore-whitespace.
 | |
|    --diff <set1> <set2>      Compute differences in code and comments between
 | |
|                              source file(s) of <set1> and <set2>.  The inputs
 | |
|                              may be any mix of files, directories, archives,
 | |
|                              or git commit hashes.  Use --diff-alignment to
 | |
|                              generate a list showing which file pairs where
 | |
|                              compared.  When comparing git branches, only files
 | |
|                              which have changed in either commit are compared.
 | |
|                              See also --git, --count-and-diff, --diff-alignment,
 | |
|                              --diff-list-file, --diff-timeout, --ignore-case,
 | |
|                              --ignore-whitespace.
 | |
|    --diff-timeout <N>        Ignore files which take more than <N> seconds
 | |
|                              to process.  Default is 10 seconds.  Setting <N>
 | |
|                              to 0 allows unlimited time.  (Large files with many
 | |
|                              repeated lines can cause Algorithm::Diff::sdiff()
 | |
|                              to take hours.) See also --timeout.
 | |
|    --docstring-as-code       cloc considers docstrings to be comments, but this is
 | |
|                              not always correct as docstrings represent regular
 | |
|                              strings when they appear on the right hand side of an
 | |
|                              assignment or as function arguments.  This switch
 | |
|                              forces docstrings to be counted as code.
 | |
|    --follow-links            [Unix only] Follow symbolic links to directories
 | |
|                              (sym links to files are always followed).
 | |
|                              See also --stat.
 | |
|    --force-lang=<lang>[,<ext>]
 | |
|                              Process all files that have a <ext> extension
 | |
|                              with the counter for language <lang>.  For
 | |
|                              example, to count all .f files with the
 | |
|                              Fortran 90 counter (which expects files to
 | |
|                              end with .f90) instead of the default Fortran 77
 | |
|                              counter, use
 | |
|                                --force-lang=\"Fortran 90,f\"
 | |
|                              If <ext> is omitted, every file will be counted
 | |
|                              with the <lang> counter.  This option can be
 | |
|                              specified multiple times (but that is only
 | |
|                              useful when <ext> is given each time).
 | |
|                              See also --script-lang, --lang-no-ext.
 | |
|    --force-lang-def=<file>   Load language processing filters from <file>,
 | |
|                              then use these filters instead of the built-in
 | |
|                              filters.  Note:  languages which map to the same
 | |
|                              file extension (for example:
 | |
|                              MATLAB/Mathematica/Objective-C/MUMPS/Mercury;
 | |
|                              Pascal/PHP; Lisp/OpenCL; Lisp/Julia; Perl/Prolog)
 | |
|                              will be ignored as these require additional
 | |
|                              processing that is not expressed in language
 | |
|                              definition files.  Use --read-lang-def to define
 | |
|                              new language filters without replacing built-in
 | |
|                              filters (see also --write-lang-def,
 | |
|                              --write-lang-def-incl-dup, --config).
 | |
|    --git                     Forces the inputs to be interpreted as git targets
 | |
|                              (commit hashes, branch names, et cetera) if these
 | |
|                              are not first identified as file or directory
 | |
|                              names.  This option overrides the --vcs=git logic
 | |
|                              if this is given; in other words, --git gets its
 | |
|                              list of files to work on directly from git using
 | |
|                              the hash or branch name rather than from
 | |
|                              'git ls-files'.  This option can be used with
 | |
|                              --diff to perform line count diffs between git
 | |
|                              commits, or between a git commit and a file,
 | |
|                              directory, or archive.  Use -v/--verbose to see
 | |
|                              the git system commands cloc issues.
 | |
|    --git-diff-rel            Same as --git --diff, or just --diff if the inputs
 | |
|                              are recognized as git targets.  Only files which
 | |
|                              have changed in either commit are compared.
 | |
|    --git-diff-all            Git diff strategy #2:  compare all files in the
 | |
|                              repository between the two commits.
 | |
|    --ignore-whitespace       Ignore horizontal white space when comparing files
 | |
|                              with --diff.  See also --ignore-case.
 | |
|    --ignore-case             Ignore changes in case within file contents;
 | |
|                              consider upper- and lowercase letters equivalent
 | |
|                              when comparing files with --diff.  See also
 | |
|                              --ignore-whitespace.
 | |
|    --ignore-case-ext         Ignore case of file name extensions.  This will
 | |
|                              cause problems counting some languages
 | |
|                              (specifically, .c and .C are associated with C and
 | |
|                              C++; this switch would count .C files as C rather
 | |
|                              than C++ on *nix operating systems).  File name
 | |
|                              case insensitivity is always true on Windows.
 | |
|    --lang-no-ext=<lang>      Count files without extensions using the <lang>
 | |
|                              counter.  This option overrides internal logic
 | |
|                              for files without extensions (where such files
 | |
|                              are checked against known scripting languages
 | |
|                              by examining the first line for #!).  See also
 | |
|                              --force-lang, --script-lang.
 | |
|    --max-file-size=<MB>      Skip files larger than <MB> megabytes when
 | |
|                              traversing directories.  By default, <MB>=100.
 | |
|                              cloc's memory requirement is roughly twenty times
 | |
|                              larger than the largest file so running with
 | |
|                              files larger than 100 MB on a computer with less
 | |
|                              than 2 GB of memory will cause problems.
 | |
|                              Note:  this check does not apply to files
 | |
|                              explicitly passed as command line arguments.
 | |
|    --no-autogen[=list]       Ignore files generated by code-production systems
 | |
|                              such as GNU autoconf.  To see a list of these files
 | |
|                              (then exit), run with --no-autogen list
 | |
|                              See also --autoconf.
 | |
|    --no-recurse              Count files in the given directories without
 | |
|                              recursively descending below them.
 | |
|    --original-dir            [Only effective in combination with
 | |
|                              --strip-comments or --strip-code]  Write the stripped
 | |
|                              files to the same directory as the original files.
 | |
|    --only-count-files        Only count files by language.  Blank, comment, and
 | |
|                              code counts will be zero.
 | |
|    --read-binary-files       Process binary files in addition to text files.
 | |
|                              This is usually a bad idea and should only be
 | |
|                              attempted with text files that have embedded
 | |
|                              binary data.
 | |
|    --read-lang-def=<file>    Load new language processing filters from <file>
 | |
|                              and merge them with those already known to cloc.
 | |
|                              If <file> defines a language cloc already knows
 | |
|                              about, cloc's definition will take precedence.
 | |
|                              Use --force-lang-def to over-ride cloc's
 | |
|                              definitions (see also --write-lang-def,
 | |
|                              --write-lang-def-incl-dup, --config).
 | |
|    --script-lang=<lang>,<s>  Process all files that invoke <s> as a #!
 | |
|                              scripting language with the counter for language
 | |
|                              <lang>.  For example, files that begin with
 | |
|                                 #!/usr/local/bin/perl5.8.8
 | |
|                              will be counted with the Perl counter by using
 | |
|                                 --script-lang=Perl,perl5.8.8
 | |
|                              The language name is case insensitive but the
 | |
|                              name of the script language executable, <s>,
 | |
|                              must have the right case.  This option can be
 | |
|                              specified multiple times.  See also --force-lang,
 | |
|                              --lang-no-ext.
 | |
|    --sdir=<dir>              Use <dir> as the scratch directory instead of
 | |
|                              letting File::Temp chose the location.  Files
 | |
|                              written to this location are not removed at
 | |
|                              the end of the run (as they are with File::Temp).
 | |
|    --skip-leading=<N[,ext]>  Skip the first <N> lines of each file.  If a
 | |
|                              comma separated list of extensions is also given,
 | |
|                              only skip lines from those file types.  Example:
 | |
|                                --skip-leading=10,cpp,h
 | |
|                              will skip the first ten lines of *.cpp and *.h
 | |
|                              files.  This is useful for ignoring boilerplate
 | |
|                              text.
 | |
|    --skip-uniqueness         Skip the file uniqueness check.  This will give
 | |
|                              a performance boost at the expense of counting
 | |
|                              files with identical contents multiple times
 | |
|                              (if such duplicates exist).
 | |
|    --stat                    Some file systems (AFS, CD-ROM, FAT, HPFS, SMB)
 | |
|                              do not have directory 'nlink' counts that match
 | |
|                              the number of its subdirectories.  Consequently
 | |
|                              cloc may undercount or completely skip the
 | |
|                              contents of such file systems.  This switch forces
 | |
|                              File::Find to stat directories to obtain the
 | |
|                              correct count.  File search speed will decrease.
 | |
|                              See also --follow-links.
 | |
|    --stdin-name=<file>       Give a file name to use to determine the language
 | |
|                              for standard input.  (Use - as the input name to
 | |
|                              receive source code via STDIN.)
 | |
|    --strip-code=<ext>        For each file processed, write to the current
 | |
|                              directory a version of the file which has blank
 | |
|                              and code lines, including code with (in-line
 | |
|                              comments) removed. The name of each stripped file
 | |
|                              is the original file name with .<ext> appended to
 | |
|                              it. It is written to the current directory unless
 | |
|                              --original-dir is on.
 | |
|    --strip-comments=<ext>    For each file processed, write to the current
 | |
|                              directory a version of the file which has blank
 | |
|                              and commented lines removed (in-line comments
 | |
|                              persist).  The name of each stripped file is the
 | |
|                              original file name with .<ext> appended to it.
 | |
|                              It is written to the current directory unless
 | |
|                              --original-dir is on.
 | |
|    --strip-str-comments      Replace comment markers embedded in strings with
 | |
|                              'xx'.  This attempts to work around a limitation
 | |
|                              in Regexp::Common::Comment where comment markers
 | |
|                              embedded in strings are seen as actual comment
 | |
|                              markers and not strings, often resulting in a
 | |
|                              'Complex regular subexpression recursion limit'
 | |
|                              warning and incorrect counts.  There are two
 | |
|                              disadvantages to using this switch:  1/code count
 | |
|                              performance drops, and 2/code generated with
 | |
|                              --strip-comments will contain different strings
 | |
|                              where ever embedded comments are found.
 | |
|    --sum-reports             Input arguments are report files previously
 | |
|                              created with the --report-file option in plain
 | |
|                              format (eg. not JSON, YAML, XML, or SQL).
 | |
|                              Makes a cumulative set of results containing the
 | |
|                              sum of data from the individual report files.
 | |
|    --timeout <N>             Ignore files which take more than <N> seconds
 | |
|                              to process at any of the language's filter stages.
 | |
|                              The default maximum number of seconds spent on a
 | |
|                              filter stage is the number of lines in the file
 | |
|                              divided by one thousand.  Setting <N> to 0 allows
 | |
|                              unlimited time.  See also --diff-timeout.
 | |
|    --processes=NUM           [Available only on systems with a recent version
 | |
|                              of the Parallel::ForkManager module.  Not
 | |
|                              available on Windows.] Sets the maximum number of
 | |
|                              cores that cloc uses.  The default value of 0
 | |
|                              disables multiprocessing.
 | |
|    --unix                    Override the operating system autodetection
 | |
|                              logic and run in UNIX mode.  See also
 | |
|                              --windows, --show-os.
 | |
|    --use-sloccount           If SLOCCount is installed, use its compiled
 | |
|                              executables c_count, java_count, pascal_count,
 | |
|                              php_count, and xml_count instead of cloc's
 | |
|                              counters.  SLOCCount's compiled counters are
 | |
|                              substantially faster than cloc's and may give
 | |
|                              a performance improvement when counting projects
 | |
|                              with large files.  However, these cloc-specific
 | |
|                              features will not be available: --diff,
 | |
|                              --count-and-diff, --strip-code, --strip-comments,
 | |
|                              --unicode.
 | |
|    --windows                 Override the operating system autodetection
 | |
|                              logic and run in Microsoft Windows mode.
 | |
|                              See also --unix, --show-os.
 | |
| 
 | |
|  ${BB}Filter Options${NN}
 | |
|    --include-content=<regex> Only count files containing text that matches the
 | |
|                              given regular expression.
 | |
|    --exclude-content=<regex> Exclude files containing text that matches the given
 | |
|                              regular expression.
 | |
|    --exclude-dir=<D1>[,D2,]  Exclude the given comma separated directories
 | |
|                              D1, D2, D3, et cetera, from being scanned.  For
 | |
|                              example  --exclude-dir=.cache,test  will skip
 | |
|                              all files and subdirectories that have /.cache/
 | |
|                              or /test/ as their parent directory.
 | |
|                              Directories named .bzr, .cvs, .hg, .git, .svn,
 | |
|                              and .snapshot are always excluded.
 | |
|                              This option only works with individual directory
 | |
|                              names so including file path separators is not
 | |
|                              allowed.  Use --fullpath and --not-match-d=<regex>
 | |
|                              to supply a regex matching multiple subdirectories.
 | |
|    --exclude-ext=<ext1>[,<ext2>[...]]
 | |
|                              Do not count files having the given file name
 | |
|                              extensions.
 | |
|    --exclude-lang=<L1>[,L2[...]]
 | |
|                              Exclude the given comma separated languages
 | |
|                              L1, L2, L3, et cetera, from being counted.
 | |
|    --exclude-list-file=<file>  Ignore files and/or directories whose names
 | |
|                              appear in <file>.  <file> should have one file
 | |
|                              name per line.  Only exact matches are ignored;
 | |
|                              relative path names will be resolved starting from
 | |
|                              the directory where cloc is invoked.
 | |
|                              See also --list-file, --config.
 | |
|    --fullpath                Modifies the behavior of --match-f, --not-match-f,
 | |
|                              and --not-match-d to include the file's path--
 | |
|                              relative to the directory from which cloc is
 | |
|                              invoked--in the regex, not just the file's basename.
 | |
|                              (This does not expand each filename to include its
 | |
|                              fully qualified absolute path; instead, it uses as
 | |
|                              much of the path as is passed in to cloc.)
 | |
|    --include-ext=<ext1>[,ext2[...]]
 | |
|                              Count only languages having the given comma
 | |
|                              separated file extensions.  Use --show-ext to
 | |
|                              see the recognized extensions.
 | |
|    --include-lang=<L1>[,L2[...]]
 | |
|                              Count only the given comma separated, case-
 | |
|                              insensitive languages L1, L2, L3, et cetera.  Use
 | |
|                              --show-lang to see the list of recognized languages.
 | |
|    --match-d=<regex>         Only count files in directories matching the Perl
 | |
|                              regex.  For example
 | |
|                                --match-d='/(src|include)/'
 | |
|                              only counts files in directories containing
 | |
|                              /src/ or /include/.  Unlike --not-match-d,
 | |
|                              --match-f, and --not-match-f, --match-d always
 | |
|                              anchors the regex to the directory from which
 | |
|                              cloc is invoked.
 | |
|    --not-match-d=<regex>     Count all files except those in directories
 | |
|                              matching the Perl regex.  Only the trailing
 | |
|                              directory name is compared, for example, when
 | |
|                              counting in /usr/local/lib, only 'lib' is
 | |
|                              compared to the regex.
 | |
|                              Add --fullpath to compare parent directories, beginning
 | |
|                              from the directory where cloc is invoked, to the regex.
 | |
|                              Do not include file path separators at the beginning
 | |
|                              or end of the regex. This option may be repeated.
 | |
|    --match-f=<regex>         Only count files whose basenames match the Perl
 | |
|                              regex.  For example
 | |
|                                --match-f='^[Ww]idget'
 | |
|                              only counts files that start with Widget or widget.
 | |
|                              Add --fullpath to include parent directories
 | |
|                              in the regex instead of just the basename.
 | |
|    --not-match-f=<regex>     Count all files except those whose basenames
 | |
|                              match the Perl regex.  Add --fullpath to include
 | |
|                              parent directories in the regex instead of just
 | |
|                              the basename. This option may be repeated.
 | |
|    --skip-archive=<regex>    Ignore files that end with the given Perl regular
 | |
|                              expression.  For example, if given
 | |
|                                --skip-archive='(zip|tar(\.(gz|Z|bz2|xz|7z))?)'
 | |
|                              the code will skip files that end with .zip,
 | |
|                              .tar, .tar.gz, .tar.Z, .tar.bz2, .tar.xz, and
 | |
|                              .tar.7z.
 | |
|    --skip-win-hidden         On Windows, ignore hidden files.
 | |
| 
 | |
|  ${BB}Debug Options${NN}
 | |
|    --categorized=<file>      Save file sizes in bytes, identified languages
 | |
|                              and names of categorized files to <file>.
 | |
|    --counted=<file>          Save names of processed source files to <file>.
 | |
|    --diff-alignment=<file>   Write to <file> a list of files and file pairs
 | |
|                              showing which files were added, removed, and/or
 | |
|                              compared during a run with --diff.  This switch
 | |
|                              forces the --diff mode on.
 | |
|    --explain=<lang>          Print the filters used to remove comments for
 | |
|                              language <lang> and exit.  In some cases the
 | |
|                              filters refer to Perl subroutines rather than
 | |
|                              regular expressions.  An examination of the
 | |
|                              source code may be needed for further explanation.
 | |
|    --help                    Print this usage information and exit.
 | |
|    --found=<file>            Save names of every file found to <file>.
 | |
|    --ignored=<file>          Save names of ignored files and the reason they
 | |
|                              were ignored to <file>.
 | |
|    --print-filter-stages     Print processed source code before and after
 | |
|                              each filter is applied.
 | |
|    --show-ext[=<ext>]        Print information about all known (or just the
 | |
|                              given) file extensions and exit.
 | |
|    --show-lang[=<lang>]      Print information about all known (or just the
 | |
|                              given) languages and exit.
 | |
|    --show-os                 Print the value of the operating system mode
 | |
|                              and exit.  See also --unix, --windows.
 | |
|    -v[=<n>]                  Verbose switch (optional numeric value).
 | |
|    -verbose[=<n>]            Long form of -v.
 | |
|    --version                 Print the version of this program and exit.
 | |
|    --write-lang-def=<file>   Writes to <file> the language processing filters
 | |
|                              then exits.  Useful as a first step to creating
 | |
|                              custom language definitions. Note: languages which
 | |
|                              map to the same file extension will be excluded.
 | |
|                              (See also --force-lang-def, --read-lang-def).
 | |
|    --write-lang-def-incl-dup=<file>
 | |
|                              Same as --write-lang-def, but includes duplicated
 | |
|                              extensions.  This generates a problematic language
 | |
|                              definition file because cloc will refuse to use
 | |
|                              it until duplicates are removed.
 | |
| 
 | |
|  ${BB}Output Options${NN}
 | |
|    --3                       Print third-generation language output.
 | |
|                              (This option can cause report summation to fail
 | |
|                              if some reports were produced with this option
 | |
|                              while others were produced without it.)
 | |
|    --by-percent  X           Instead of comment and blank line counts, show
 | |
|                              these values as percentages based on the value
 | |
|                              of X in the denominator, where X is
 | |
|                                  c    meaning lines of code
 | |
|                                  cm   meaning lines of code + comments
 | |
|                                  cb   meaning lines of code + blanks
 | |
|                                  cmb  meaning lines of code + comments + blanks
 | |
|                              For example, if using method 'c' and your code
 | |
|                              has twice as many lines of comments as lines
 | |
|                              of code, the value in the comment column will
 | |
|                              be 200%.  The code column remains a line count.
 | |
|    --csv                     Write the results as comma separated values.
 | |
|    --csv-delimiter=<C>       Use the character <C> as the delimiter for comma
 | |
|                              separated files instead of ,.  This switch forces
 | |
|    --file-encoding=<E>       Write output files using the <E> encoding instead of
 | |
|                              the default ASCII (<E> = 'UTF-7').  Examples: 'UTF-16',
 | |
|                              'euc-kr', 'iso-8859-16'.  Known encodings can be
 | |
|                              printed with
 | |
|                                perl -MEncode -e 'print join(\"\\n\", Encode->encodings(\":all\")), \"\\n\"'
 | |
|    --fmt=<N>                 Alternate text output format where <N> is a number
 | |
|                              from 1 to 5, or -1 to -5. 'total lines' means the
 | |
|                              sum of code, comment, and blank lines.  Negative
 | |
|                              values are the same as the positive values but retain,
 | |
|                              instead of deleting, the intermediate JSON file that
 | |
|                              is written.  The JSON file name is randomly generated
 | |
|                              unless --out/--report-file is given.  The formats are:
 | |
|                                1:  by language (same as cloc default output)
 | |
|                                2:  by language with an extra column for total lines
 | |
|                                3:  by file with language
 | |
|                                4:  by file with a total lines column
 | |
|                                5:  by file with language and a total lines column
 | |
|    --hide-rate               Do not show elapsed time, line processing rate, or
 | |
|                              file processing rates in the output header. This
 | |
|                              makes output deterministic.
 | |
|    --json                    Write the results as JavaScript Object Notation
 | |
|                              (JSON) formatted output.
 | |
|    --md                      Write the results as Markdown-formatted text.
 | |
|    --out=<file>              Synonym for --report-file=<file>.
 | |
|    --progress-rate=<n>       Show progress update after every <n> files are
 | |
|                              processed (default <n>=100).  Set <n> to 0 to
 | |
|                              suppress progress output (useful when redirecting
 | |
|                              output to STDOUT).
 | |
|    --quiet                   Suppress all information messages except for
 | |
|                              the final report.
 | |
|    --report-file=<file>      Write the results to <file> instead of STDOUT.
 | |
|    --summary-cutoff=X:N      Aggregate to 'Other' results having X lines
 | |
|                              below N where X is one of
 | |
|                                 c   meaning lines of code
 | |
|                                 f   meaning files
 | |
|                                 m   meaning lines of comments
 | |
|                                 cm  meaning lines of code + comments
 | |
|                              Appending a percent sign to N changes
 | |
|                              the calculation from straight count to
 | |
|                              percentage.
 | |
|                              Ignored with --diff or --by-file.
 | |
|    --sql=<file>              Write results as SQL create and insert statements
 | |
|                              which can be read by a database program such as
 | |
|                              SQLite.  If <file> is -, output is sent to STDOUT.
 | |
|    --sql-append              Append SQL insert statements to the file specified
 | |
|                              by --sql and do not generate table creation
 | |
|                              statements.  Only valid with the --sql option.
 | |
|    --sql-project=<name>      Use <name> as the project identifier for the
 | |
|                              current run.  Only valid with the --sql option.
 | |
|    --sql-style=<style>       Write SQL statements in the given style instead
 | |
|                              of the default SQLite format.  Styles include
 | |
|                              'Oracle' and 'Named_Columns'.
 | |
|    --sum-one                 For plain text reports, show the SUM: output line
 | |
|                              even if only one input file is processed.
 | |
|    --xml                     Write the results in XML.
 | |
|    --xsl=<file>              Reference <file> as an XSL stylesheet within
 | |
|                              the XML output.  If <file> is 1 (numeric one),
 | |
|                              writes a default stylesheet, cloc.xsl (or
 | |
|                              cloc-diff.xsl if --diff is also given).
 | |
|                              This switch forces --xml on.
 | |
|    --yaml                    Write the results in YAML.
 | |
| 
 | |
| ";
 | |
| #  Help information for options not yet implemented:
 | |
| #  --inline                  Process comments that appear at the end
 | |
| #                            of lines containing code.
 | |
| #  --html                    Create HTML files of each input file showing
 | |
| #                            comment and code lines in different colors.
 | |
| 
 | |
| $| = 1;  # flush STDOUT
 | |
| my $start_time = get_time();
 | |
| my (
 | |
|     $opt_categorized          ,
 | |
|     $opt_found                ,
 | |
|     @opt_force_lang           ,
 | |
|     $opt_lang_no_ext          ,
 | |
|     @opt_script_lang          ,
 | |
|     $opt_count_diff           ,
 | |
|     $opt_diff                 ,
 | |
|     $opt_diff_alignment       ,
 | |
|     $opt_diff_list_file       ,
 | |
|     $opt_diff_list_files      ,
 | |
|     $opt_diff_timeout         ,
 | |
|     $opt_timeout              ,
 | |
|     $opt_html                 ,
 | |
|     $opt_ignored              ,
 | |
|     $opt_counted              ,
 | |
|     $opt_show_ext             ,
 | |
|     $opt_show_lang            ,
 | |
|     $opt_progress_rate        ,
 | |
|     $opt_print_filter_stages  ,
 | |
|     $opt_v                    ,
 | |
|     $opt_vcs                  ,
 | |
|     $opt_version              ,
 | |
|     $opt_include_content      ,
 | |
|     $opt_exclude_content      ,
 | |
|     $opt_exclude_lang         ,
 | |
|     $opt_exclude_list_file    ,
 | |
|     $opt_exclude_dir          ,
 | |
|     $opt_explain              ,
 | |
|     $opt_include_ext          ,
 | |
|     $opt_include_lang         ,
 | |
|     $opt_force_lang_def       ,
 | |
|     $opt_read_lang_def        ,
 | |
|     $opt_write_lang_def       ,
 | |
|     $opt_write_lang_def_incl_dup,
 | |
|     $opt_strip_code           ,
 | |
|     $opt_strip_comments       ,
 | |
|     $opt_original_dir         ,
 | |
|     $opt_quiet                ,
 | |
|     $opt_report_file          ,
 | |
|     $opt_sdir                 ,
 | |
|     $opt_sum_reports          ,
 | |
|     $opt_hide_rate            ,
 | |
|     $opt_processes            ,
 | |
|     $opt_unicode              ,
 | |
|     $opt_no3                  ,   # accept it but don't use it
 | |
|     $opt_3                    ,
 | |
|     $opt_extract_with         ,
 | |
|     $opt_by_file              ,
 | |
|     $opt_by_file_by_lang      ,
 | |
|     $opt_by_percent           ,
 | |
|     $opt_xml                  ,
 | |
|     $opt_xsl                  ,
 | |
|     $opt_yaml                 ,
 | |
|     $opt_csv                  ,
 | |
|     $opt_csv_delimiter        ,
 | |
|     $opt_fullpath             ,
 | |
|     $opt_json                 ,
 | |
|     $opt_md                   ,
 | |
|     $opt_match_f              ,
 | |
|     @opt_not_match_f          ,
 | |
|     $opt_match_d              ,
 | |
|     @opt_not_match_d          ,
 | |
|     $opt_skip_uniqueness      ,
 | |
|     $opt_list_file            ,
 | |
|     $opt_help                 ,
 | |
|     $opt_skip_win_hidden      ,
 | |
|     $opt_read_binary_files    ,
 | |
|     $opt_sql                  ,
 | |
|     $opt_sql_append           ,
 | |
|     $opt_sql_project          ,
 | |
|     $opt_sql_style            ,
 | |
|     $opt_inline               ,
 | |
|     $opt_exclude_ext          ,
 | |
|     $opt_ignore_whitespace    ,
 | |
|     $opt_ignore_case          ,
 | |
|     $opt_ignore_case_ext      ,
 | |
|     $opt_follow_links         ,
 | |
|     $opt_autoconf             ,
 | |
|     $opt_sum_one              ,
 | |
|     $opt_stdin_name           ,
 | |
|     $opt_force_on_windows     ,
 | |
|     $opt_force_on_unix        ,   # actually forces !$ON_WINDOWS
 | |
|     $opt_show_os              ,
 | |
|     $opt_skip_archive         ,
 | |
|     $opt_max_file_size        ,   # in MB
 | |
|     $opt_use_sloccount        ,
 | |
|     $opt_no_autogen           ,
 | |
|     $opt_force_git            ,
 | |
|     $opt_git_diff_rel         ,
 | |
|     $opt_git_diff_all         ,
 | |
|     $opt_git_diff_simindex    ,
 | |
|     $opt_config_file          ,
 | |
|     $opt_strip_str_comments   ,
 | |
|     $opt_file_encoding        ,
 | |
|     $opt_docstring_as_code    ,
 | |
|     $opt_stat                 ,
 | |
|     $opt_summary_cutoff       ,
 | |
|     $opt_skip_leading         ,
 | |
|     $opt_no_recurse           ,
 | |
|     $opt_only_count_files     ,
 | |
|     $opt_fmt                  ,
 | |
|    );
 | |
| 
 | |
| my $getopt_success = GetOptions(             # {{{1
 | |
|    "by_file|by-file"                         => \$opt_by_file             ,
 | |
|    "by_file_by_lang|by-file-by-lang"         => \$opt_by_file_by_lang     ,
 | |
|    "categorized=s"                           => \$opt_categorized         ,
 | |
|    "counted=s"                               => \$opt_counted             ,
 | |
|    "include_ext|include-ext=s"               => \$opt_include_ext         ,
 | |
|    "include_lang|include-lang=s"             => \$opt_include_lang        ,
 | |
|    "include_content|include-content=s"       => \$opt_include_content     ,
 | |
|    "exclude_content|exclude-content=s"       => \$opt_exclude_content     ,
 | |
|    "exclude_lang|exclude-lang=s"             => \$opt_exclude_lang        ,
 | |
|    "exclude_dir|exclude-dir=s"               => \$opt_exclude_dir         ,
 | |
|    "exclude_list_file|exclude-list-file=s"   => \$opt_exclude_list_file   ,
 | |
|    "explain=s"                               => \$opt_explain             ,
 | |
|    "extract_with|extract-with=s"             => \$opt_extract_with        ,
 | |
|    "found=s"                                 => \$opt_found               ,
 | |
|    "count_and_diff|count-and-diff"           => \$opt_count_diff          ,
 | |
|    "diff"                                    => \$opt_diff                ,
 | |
|    "diff-alignment|diff_alignment=s"         => \$opt_diff_alignment      ,
 | |
|    "diff-timeout|diff_timeout=i"             => \$opt_diff_timeout        ,
 | |
|    "diff-list-file|diff_list_file=s"         => \$opt_diff_list_file      ,
 | |
|    "diff-list-files|diff_list_files"         => \$opt_diff_list_files     ,
 | |
|    "timeout=i"                               => \$opt_timeout             ,
 | |
|    "html"                                    => \$opt_html                ,
 | |
|    "ignored=s"                               => \$opt_ignored             ,
 | |
|    "quiet"                                   => \$opt_quiet               ,
 | |
|    "force_lang_def|force-lang-def=s"         => \$opt_force_lang_def      ,
 | |
|    "read_lang_def|read-lang-def=s"           => \$opt_read_lang_def       ,
 | |
|    "show_ext|show-ext:s"                     => \$opt_show_ext            ,
 | |
|    "show_lang|show-lang:s"                   => \$opt_show_lang           ,
 | |
|    "progress_rate|progress-rate=i"           => \$opt_progress_rate       ,
 | |
|    "print_filter_stages|print-filter-stages" => \$opt_print_filter_stages ,
 | |
|    "report_file|report-file=s"               => \$opt_report_file         ,
 | |
|    "out=s"                                   => \$opt_report_file         ,
 | |
|    "script_lang|script-lang=s"               => \@opt_script_lang         ,
 | |
|    "sdir=s"                                  => \$opt_sdir                ,
 | |
|    "skip_uniqueness|skip-uniqueness"         => \$opt_skip_uniqueness     ,
 | |
|    "strip_code|strip-code=s"                 => \$opt_strip_code          ,
 | |
|    "strip_comments|strip-comments=s"         => \$opt_strip_comments      ,
 | |
|    "original_dir|original-dir"               => \$opt_original_dir        ,
 | |
|    "sum_reports|sum-reports"                 => \$opt_sum_reports         ,
 | |
|    "hide_rate|hide-rate"                     => \$opt_hide_rate           ,
 | |
|    "processes=n"                             => \$opt_processes           ,
 | |
|    "unicode"                                 => \$opt_unicode             ,
 | |
|    "no3"                                     => \$opt_no3                 ,  # ignored
 | |
|    "3"                                       => \$opt_3                   ,
 | |
|    "v|verbose:i"                             => \$opt_v                   ,
 | |
|    "vcs=s"                                   => \$opt_vcs                 ,
 | |
|    "version"                                 => \$opt_version             ,
 | |
|    "write_lang_def|write-lang-def=s"         => \$opt_write_lang_def      ,
 | |
|    "write_lang_def_incl_dup|write-lang-def-incl-dup=s" => \$opt_write_lang_def_incl_dup,
 | |
|    "xml"                                     => \$opt_xml                 ,
 | |
|    "xsl=s"                                   => \$opt_xsl                 ,
 | |
|    "force_lang|force-lang=s"                 => \@opt_force_lang          ,
 | |
|    "lang_no_ext|lang-no-ext=s"               => \$opt_lang_no_ext         ,
 | |
|    "yaml"                                    => \$opt_yaml                ,
 | |
|    "csv"                                     => \$opt_csv                 ,
 | |
|    "csv_delimiter|csv-delimiter=s"           => \$opt_csv_delimiter       ,
 | |
|    "json"                                    => \$opt_json                ,
 | |
|    "md"                                      => \$opt_md                  ,
 | |
|    "fullpath"                                => \$opt_fullpath            ,
 | |
|    "match_f|match-f=s"                       => \$opt_match_f             ,
 | |
|    "not_match_f|not-match-f=s"               => \@opt_not_match_f         ,
 | |
|    "match_d|match-d=s"                       => \$opt_match_d             ,
 | |
|    "not_match_d|not-match-d=s"               => \@opt_not_match_d         ,
 | |
|    "list_file|list-file=s"                   => \$opt_list_file           ,
 | |
|    "help"                                    => \$opt_help                ,
 | |
|    "skip_win_hidden|skip-win-hidden"         => \$opt_skip_win_hidden     ,
 | |
|    "read_binary_files|read-binary-files"     => \$opt_read_binary_files   ,
 | |
|    "sql=s"                                   => \$opt_sql                 ,
 | |
|    "sql_project|sql-project=s"               => \$opt_sql_project         ,
 | |
|    "sql_append|sql-append"                   => \$opt_sql_append          ,
 | |
|    "sql_style|sql-style=s"                   => \$opt_sql_style           ,
 | |
|    "inline"                                  => \$opt_inline              ,
 | |
|    "exclude_ext|exclude-ext=s"               => \$opt_exclude_ext         ,
 | |
|    "ignore_whitespace|ignore-whitespace"     => \$opt_ignore_whitespace   ,
 | |
|    "ignore_case|ignore-case"                 => \$opt_ignore_case         ,
 | |
|    "ignore_case_ext|ignore-case-ext"         => \$opt_ignore_case_ext     ,
 | |
|    "follow_links|follow-links"               => \$opt_follow_links        ,
 | |
|    "autoconf"                                => \$opt_autoconf            ,
 | |
|    "sum_one|sum-one"                         => \$opt_sum_one             ,
 | |
|    "by_percent|by-percent=s"                 => \$opt_by_percent          ,
 | |
|    "stdin_name|stdin-name=s"                 => \$opt_stdin_name          ,
 | |
|    "windows"                                 => \$opt_force_on_windows    ,
 | |
|    "unix"                                    => \$opt_force_on_unix       ,
 | |
|    "show_os|show-os"                         => \$opt_show_os             ,
 | |
|    "skip_archive|skip-archive=s"             => \$opt_skip_archive        ,
 | |
|    "max_file_size|max-file-size=f"           => \$opt_max_file_size       ,
 | |
|    "use_sloccount|use-sloccount"             => \$opt_use_sloccount       ,
 | |
|    "no_autogen|no-autogen"                   => \$opt_no_autogen          ,
 | |
|    "git"                                     => \$opt_force_git           ,
 | |
|    "git_diff_rel|git-diff-rel"               => \$opt_git_diff_rel        ,
 | |
|    "git_diff_all|git-diff-all"               => \$opt_git_diff_all        ,
 | |
| #  "git_diff_simindex|git-diff-simindex"     => \$opt_git_diff_simindex   ,
 | |
|    "config=s"                                => \$opt_config_file         ,
 | |
|    "strip_str_comments|strip-str-comments"   => \$opt_strip_str_comments  ,
 | |
|    "file_encoding|file-encoding=s"           => \$opt_file_encoding       ,
 | |
|    "docstring_as_code|docstring-as-code"     => \$opt_docstring_as_code   ,
 | |
|    "stat"                                    => \$opt_stat                ,
 | |
|    "summary_cutoff|summary-cutoff=s"         => \$opt_summary_cutoff      ,
 | |
|    "skip_leading|skip-leading:s"             => \$opt_skip_leading        ,
 | |
|    "no_recurse|no-recurse"                   => \$opt_no_recurse          ,
 | |
|    "only_count_files|only-count-files"       => \$opt_only_count_files    ,
 | |
|    "fmt=i"                                   => \$opt_fmt                 ,
 | |
|   );
 | |
| # 1}}}
 | |
| $config_file = $opt_config_file if defined $opt_config_file;
 | |
| load_from_config_file($config_file,          # {{{2
 | |
|                                                 \$opt_by_file             ,
 | |
|                                                 \$opt_by_file_by_lang     ,
 | |
|                                                 \$opt_categorized         ,
 | |
|                                                 \$opt_counted             ,
 | |
|                                                 \$opt_include_ext         ,
 | |
|                                                 \$opt_include_lang        ,
 | |
|                                                 \$opt_include_content     ,
 | |
|                                                 \$opt_exclude_content     ,
 | |
|                                                 \$opt_exclude_lang        ,
 | |
|                                                 \$opt_exclude_dir         ,
 | |
|                                                 \$opt_exclude_list_file   ,
 | |
|                                                 \$opt_explain             ,
 | |
|                                                 \$opt_extract_with        ,
 | |
|                                                 \$opt_found               ,
 | |
|                                                 \$opt_count_diff          ,
 | |
|                                                 \$opt_diff_list_files     ,
 | |
|                                                 \$opt_diff                ,
 | |
|                                                 \$opt_diff_alignment      ,
 | |
|                                                 \$opt_diff_timeout        ,
 | |
|                                                 \$opt_timeout             ,
 | |
|                                                 \$opt_html                ,
 | |
|                                                 \$opt_ignored             ,
 | |
|                                                 \$opt_quiet               ,
 | |
|                                                 \$opt_force_lang_def      ,
 | |
|                                                 \$opt_read_lang_def       ,
 | |
|                                                 \$opt_show_ext            ,
 | |
|                                                 \$opt_show_lang           ,
 | |
|                                                 \$opt_progress_rate       ,
 | |
|                                                 \$opt_print_filter_stages ,
 | |
|                                                 \$opt_report_file         ,
 | |
|                                                 \@opt_script_lang         ,
 | |
|                                                 \$opt_sdir                ,
 | |
|                                                 \$opt_skip_uniqueness     ,
 | |
|                                                 \$opt_strip_code          ,
 | |
|                                                 \$opt_strip_comments      ,
 | |
|                                                 \$opt_original_dir        ,
 | |
|                                                 \$opt_sum_reports         ,
 | |
|                                                 \$opt_hide_rate           ,
 | |
|                                                 \$opt_processes           ,
 | |
|                                                 \$opt_unicode             ,
 | |
|                                                 \$opt_3                   ,
 | |
|                                                 \$opt_v                   ,
 | |
|                                                 \$opt_vcs                 ,
 | |
|                                                 \$opt_version             ,
 | |
|                                                 \$opt_write_lang_def      ,
 | |
|                                                 \$opt_write_lang_def_incl_dup,
 | |
|                                                 \$opt_xml                 ,
 | |
|                                                 \$opt_xsl                 ,
 | |
|                                                 \@opt_force_lang          ,
 | |
|                                                 \$opt_lang_no_ext         ,
 | |
|                                                 \$opt_yaml                ,
 | |
|                                                 \$opt_csv                 ,
 | |
|                                                 \$opt_csv_delimiter       ,
 | |
|                                                 \$opt_json                ,
 | |
|                                                 \$opt_md                  ,
 | |
|                                                 \$opt_fullpath            ,
 | |
|                                                 \$opt_match_f             ,
 | |
|                                                 \@opt_not_match_f         ,
 | |
|                                                 \$opt_match_d             ,
 | |
|                                                 \@opt_not_match_d         ,
 | |
|                                                 \$opt_list_file           ,
 | |
|                                                 \$opt_help                ,
 | |
|                                                 \$opt_skip_win_hidden     ,
 | |
|                                                 \$opt_read_binary_files   ,
 | |
|                                                 \$opt_sql                 ,
 | |
|                                                 \$opt_sql_project         ,
 | |
|                                                 \$opt_sql_append          ,
 | |
|                                                 \$opt_sql_style           ,
 | |
|                                                 \$opt_inline              ,
 | |
|                                                 \$opt_exclude_ext         ,
 | |
|                                                 \$opt_ignore_whitespace   ,
 | |
|                                                 \$opt_ignore_case         ,
 | |
|                                                 \$opt_ignore_case_ext     ,
 | |
|                                                 \$opt_follow_links        ,
 | |
|                                                 \$opt_autoconf            ,
 | |
|                                                 \$opt_sum_one             ,
 | |
|                                                 \$opt_by_percent          ,
 | |
|                                                 \$opt_stdin_name          ,
 | |
|                                                 \$opt_force_on_windows    ,
 | |
|                                                 \$opt_force_on_unix       ,
 | |
|                                                 \$opt_show_os             ,
 | |
|                                                 \$opt_skip_archive        ,
 | |
|                                                 \$opt_max_file_size       ,
 | |
|                                                 \$opt_use_sloccount       ,
 | |
|                                                 \$opt_no_autogen          ,
 | |
|                                                 \$opt_force_git           ,
 | |
|                                                 \$opt_strip_str_comments  ,
 | |
|                                                 \$opt_file_encoding       ,
 | |
|                                                 \$opt_docstring_as_code   ,
 | |
|                                                 \$opt_stat                ,
 | |
| );  # 2}}} Not pretty.  Not at all.
 | |
| if ($opt_version) {
 | |
|     printf "$VERSION\n";
 | |
|     exit;
 | |
| }
 | |
| my $opt_git = 0;
 | |
| $opt_git = 1 if defined($opt_git_diff_all) or
 | |
|                 defined($opt_git_diff_rel) or
 | |
|                 (defined($opt_vcs) and ($opt_vcs eq "git"));
 | |
| $opt_by_file  = 1 if defined  $opt_by_file_by_lang;
 | |
| $opt_fmt = 0 unless defined $opt_fmt;
 | |
| if ($opt_fmt) {
 | |
|     $opt_by_file = 1;
 | |
|     $opt_json = 1;
 | |
| }
 | |
| my $CLOC_XSL = "cloc.xsl"; # created with --xsl
 | |
|    $CLOC_XSL = "cloc-diff.xsl" if $opt_diff;
 | |
| die "\n" unless $getopt_success;
 | |
| print $usage and exit if $opt_help;
 | |
| my %Exclude_Language = ();
 | |
|    %Exclude_Language = map { $_ => 1 } split(/,/, $opt_exclude_lang)
 | |
|         if $opt_exclude_lang;
 | |
| my %Exclude_Dir      = ();
 | |
|    %Exclude_Dir      = map { $_ => 1 } split(/,/, $opt_exclude_dir )
 | |
|         if $opt_exclude_dir ;
 | |
| die unless exclude_dir_validates(\%Exclude_Dir);
 | |
| my %Include_Ext = ();
 | |
|    %Include_Ext = map { $_ => 1 } split(/,/, $opt_include_ext)
 | |
|         if $opt_include_ext;
 | |
| my %Include_Language = (); # keys are lower case language names
 | |
|    %Include_Language = map { lc($_) => 1 } split(/,/, $opt_include_lang)
 | |
|         if $opt_include_lang;
 | |
| # Forcibly exclude .svn, .cvs, .hg, .git, .bzr directories.  The contents of these
 | |
| # directories often conflict with files of interest.
 | |
| $opt_exclude_dir       = 1;
 | |
| $Exclude_Dir{".svn"}   = 1;
 | |
| $Exclude_Dir{".cvs"}   = 1;
 | |
| $Exclude_Dir{".hg"}    = 1;
 | |
| $Exclude_Dir{".git"}   = 1;
 | |
| $Exclude_Dir{".bzr"}   = 1;
 | |
| $Exclude_Dir{".snapshot"} = 1;  # NetApp backups
 | |
| $Exclude_Dir{".config"} = 1;
 | |
| $opt_count_diff        = defined $opt_count_diff ? 1 : 0;
 | |
| $opt_diff              = 1  if $opt_diff_alignment    or
 | |
|                                $opt_diff_list_file    or
 | |
|                                $opt_diff_list_files   or
 | |
|                                $opt_git_diff_rel      or
 | |
|                                $opt_git_diff_all      or
 | |
|                                $opt_git_diff_simindex;
 | |
| $opt_force_git         = 1  if $opt_git_diff_rel      or
 | |
|                                $opt_git_diff_all      or
 | |
|                                $opt_git_diff_simindex;
 | |
| $opt_diff_alignment    = 0  if $opt_diff_list_file;
 | |
| $opt_exclude_ext       = "" unless $opt_exclude_ext;
 | |
| $opt_ignore_whitespace = 0  unless $opt_ignore_whitespace;
 | |
| $opt_ignore_case       = 0  unless $opt_ignore_case;
 | |
| $opt_ignore_case_ext   = 0  unless $opt_ignore_case_ext;
 | |
| $opt_lang_no_ext       = 0  unless $opt_lang_no_ext;
 | |
| $opt_follow_links      = 0  unless $opt_follow_links;
 | |
| if (defined $opt_diff_timeout) {
 | |
|     # if defined but with a value of <= 0, set to 2^31-1 seconds = 68 years
 | |
|     $opt_diff_timeout = 2**31-1 unless $opt_diff_timeout > 0;
 | |
| } else {
 | |
|     $opt_diff_timeout  =10; # seconds
 | |
| }
 | |
| if (defined $opt_timeout) {
 | |
|     # if defined but with a value of <= 0, set to 2^31-1 seconds = 68 years
 | |
|     $opt_timeout = 2**31-1 unless $opt_timeout > 0;
 | |
|     # else is computed dynamically, ref $max_duration_sec
 | |
| }
 | |
| $opt_csv               = 0  unless defined $opt_csv;
 | |
| $opt_csv               = 1  if $opt_csv_delimiter;
 | |
| $ON_WINDOWS            = 1  if $opt_force_on_windows;
 | |
| $ON_WINDOWS            = 0  if $opt_force_on_unix;
 | |
| $opt_max_file_size     = 100 unless $opt_max_file_size;
 | |
| my $HAVE_SLOCCOUNT_c_count = 0;
 | |
| if (!$ON_WINDOWS and $opt_use_sloccount) {
 | |
|     # Only bother doing this kludgey test is user explicitly wants
 | |
|     # to use SLOCCount.  Debian based systems will hang if just doing
 | |
|     #  external_utility_exists("c_count")
 | |
|     # if c_count is in $PATH; c_count expects to have input.
 | |
|     $HAVE_SLOCCOUNT_c_count = external_utility_exists("c_count /bin/sh");
 | |
| }
 | |
| if ($opt_use_sloccount) {
 | |
|     if (!$HAVE_SLOCCOUNT_c_count) {
 | |
|         warn "c_count could not be found; ignoring --use-sloccount\n";
 | |
|         $opt_use_sloccount = 0;
 | |
|     } else {
 | |
|         warn "Using c_count, php_count, xml_count, pascal_count from SLOCCount\n";
 | |
|         warn "--diff is disabled with --use-sloccount\n" if $opt_diff;
 | |
|         warn "--count-and-diff is disabled with --use-sloccount\n" if $opt_count_diff;
 | |
|         warn "--unicode is disabled with --use-sloccount\n" if $opt_unicode;
 | |
|         warn "--strip-comments is disabled with --use-sloccount\n" if $opt_strip_comments;
 | |
|         warn "--strip-code is disabled with --use-sloccount\n" if $opt_strip_code;
 | |
|         $opt_diff           = 0;
 | |
|         $opt_count_diff     = undef;
 | |
|         $opt_unicode        = 0;
 | |
|         $opt_strip_comments = 0;
 | |
|         $opt_strip_code     = 0;
 | |
|     }
 | |
| }
 | |
| die "--strip-comments and --strip-code are mutually exclusive\n" if
 | |
|     $opt_strip_comments and $opt_strip_code;
 | |
| $opt_vcs = 0 if $opt_force_git;
 | |
| 
 | |
| # replace Windows path separators with /
 | |
| if ($ON_WINDOWS) {
 | |
|     map { s{\\}{/}g } @ARGV;
 | |
|     if ($opt_git) {
 | |
|         # PowerShell tab expansion automatically prefixes local directories
 | |
|         # with ".\" (now mapped to "./").   git ls-files output does not
 | |
|         # include this.  Strip this prefix to permit clean matches.
 | |
|         map { s{^\./}{} } @ARGV;
 | |
|     }
 | |
| }
 | |
| 
 | |
| my @COUNT_DIFF_ARGV        = undef;
 | |
| my $COUNT_DIFF_report_file = undef;
 | |
| if ($opt_count_diff and !$opt_diff_list_file) {
 | |
|     die "--count-and-diff requires two arguments; got ", scalar @ARGV, "\n"
 | |
|         if scalar @ARGV != 2;
 | |
|     # prefix with a dummy term so that $opt_count_diff is the
 | |
|     # index into @COUNT_DIFF_ARGV to work on at each pass
 | |
|     @COUNT_DIFF_ARGV = (undef, $ARGV[0],
 | |
|                                $ARGV[1],
 | |
|                               [$ARGV[0], $ARGV[1]]);  # 3rd pass: diff them
 | |
|     $COUNT_DIFF_report_file = $opt_report_file if $opt_report_file;
 | |
| }
 | |
| 
 | |
| # Options defaults:
 | |
| $opt_quiet         =   1 if ($opt_md or $opt_json or !(-t STDOUT))
 | |
|                             and !defined $opt_report_file;
 | |
| $opt_progress_rate = 100 unless defined $opt_progress_rate;
 | |
| $opt_progress_rate =   0 if     defined $opt_quiet;
 | |
| if (!defined $opt_v) {
 | |
|     $opt_v  = 0;
 | |
| } elsif (!$opt_v) {
 | |
|     $opt_v  = 1;
 | |
| }
 | |
| if (defined $opt_xsl) {
 | |
|     $opt_xsl = $CLOC_XSL if $opt_xsl eq "1";
 | |
|     $opt_xml = 1;
 | |
| }
 | |
| my $skip_generate_report = 0;
 | |
| $opt_sql_style = 0 unless defined $opt_sql_style;
 | |
| $opt_sql = 0 unless $opt_sql_style or defined $opt_sql;
 | |
| if ($opt_sql eq "-" || $opt_sql eq "1") { # stream SQL output to STDOUT
 | |
|     $opt_quiet            = 1;
 | |
|     $skip_generate_report = 1;
 | |
|     $opt_by_file          = 1;
 | |
|     $opt_sum_reports      = 0;
 | |
|     $opt_progress_rate    = 0;
 | |
| } elsif ($opt_sql)  { # write SQL output to a file
 | |
|     $opt_by_file          = 1;
 | |
|     $skip_generate_report = 1;
 | |
|     $opt_sum_reports      = 0;
 | |
| }
 | |
| if ($opt_sql_style) {
 | |
|     $opt_sql_style = lc $opt_sql_style;
 | |
|     if (!grep { lc $_ eq $opt_sql_style } qw ( Oracle Named_Columns )) {
 | |
|         die "'$opt_sql_style' is not a recognized SQL style.\n";
 | |
|     }
 | |
| }
 | |
| $opt_by_percent = '' unless defined $opt_by_percent;
 | |
| if ($opt_by_percent and $opt_by_percent !~ m/^(c|cm|cb|cmb)$/i) {
 | |
|     die "--by-percent must be either 'c', 'cm', 'cb', or 'cmb'\n";
 | |
| }
 | |
| $opt_by_percent = lc $opt_by_percent;
 | |
| 
 | |
| if (defined $opt_vcs) {
 | |
|     if ($opt_vcs eq "auto") {
 | |
|         if      (is_dir(".git")) {
 | |
|             $opt_vcs = "git";
 | |
|         } elsif (is_dir(".svn")) {
 | |
|             $opt_vcs = "svn";
 | |
|         } else {
 | |
|             warn "--vcs auto:  unable to determine versioning system\n";
 | |
|         }
 | |
|     }
 | |
|     if      ($opt_vcs eq "git") {
 | |
|         $opt_vcs = "git ls-files";
 | |
|         my @submodules = invoke_generator('git submodule status', \@ARGV);
 | |
|         foreach my $SM (@submodules) {
 | |
|             $SM =~ s/^\s+//;        # may have leading space
 | |
|             $SM =~ s/\(\S+\)\s*$//; # may end with something like (heads/master)
 | |
|             my ($checksum, $dir) = split(' ', $SM, 2);
 | |
|             $dir =~ s/\s+$//;
 | |
|             $Exclude_Dir{$dir} = 1;
 | |
|         }
 | |
|     } elsif ($opt_vcs eq "svn") {
 | |
|         $opt_vcs = "svn list -R";
 | |
|     }
 | |
| }
 | |
| 
 | |
| my $list_no_autogen = 0;
 | |
| if (defined $opt_no_autogen and scalar @ARGV == 1 and $ARGV[0] eq "list") {
 | |
|     $list_no_autogen = 1;
 | |
| }
 | |
| if ($opt_summary_cutoff) {
 | |
|     my $error = summary_cutoff_error($opt_summary_cutoff);
 | |
|     die "$error\n" if $error;
 | |
| }
 | |
| 
 | |
| if (!$opt_config_file) {
 | |
|     # if not explicitly given, look for a config file in other
 | |
|     # possible locations
 | |
|     my $other_loc = check_alternate_config_files($opt_list_file,
 | |
|         $opt_exclude_list_file, $opt_read_lang_def, $opt_force_lang_def,
 | |
|         $opt_diff_list_file);
 | |
|     $opt_config_file = $other_loc if $other_loc;
 | |
| }
 | |
| 
 | |
| # --match-d and --not-match-d: if end with a trailing slash, update the
 | |
| # regex to be either slash or end of line since File::Find::find() will
 | |
| # not see the trailing slash in leaf directories (#732, #833).
 | |
| if ($opt_match_d and $opt_match_d =~ m{/$}) {
 | |
|     $opt_match_d =~ s{/$}{(/|\$)};
 | |
| }
 | |
| foreach my $nmd (@opt_not_match_d) {
 | |
|     if ($nmd =~ m{/$}) {
 | |
|         $nmd =~ s{/$}{(/|\$)};
 | |
|     }
 | |
| }
 | |
| 
 | |
| die $brief_usage unless defined $opt_version         or
 | |
|                         defined $opt_show_lang       or
 | |
|                         defined $opt_show_ext        or
 | |
|                         defined $opt_show_os         or
 | |
|                         defined $opt_write_lang_def  or
 | |
|                         defined $opt_write_lang_def_incl_dup  or
 | |
|                         defined $opt_list_file       or
 | |
|                         defined $opt_diff_list_file  or
 | |
|                         defined $opt_vcs             or
 | |
|                         defined $opt_xsl             or
 | |
|                         defined $opt_explain         or
 | |
|                         $list_no_autogen             or
 | |
|                         scalar @ARGV >= 1;
 | |
| if (!$opt_diff_list_file) {
 | |
|     die "--diff requires two arguments; got ", scalar @ARGV, "\n"
 | |
|         if $opt_diff and !$opt_sum_reports and scalar @ARGV != 2;
 | |
|     die "--diff arguments are identical; nothing done", "\n"
 | |
|         if $opt_diff and !$opt_sum_reports and scalar @ARGV == 2
 | |
|                                            and $ARGV[0] eq $ARGV[1];
 | |
| }
 | |
| trick_pp_packer_encode() if $ON_WINDOWS and $opt_file_encoding;
 | |
| $File::Find::dont_use_nlink = 1 if $opt_stat or top_level_SMB_dir(\@ARGV);
 | |
| my @git_similarity = (); # only populated with --git-diff-simindex
 | |
| my %git_metadata   = ();
 | |
| get_git_metadata(\@ARGV, \%git_metadata) if $opt_force_git;
 | |
| #use Data::Dumper;
 | |
| #print Dumper(\%git_metadata);
 | |
| replace_git_hash_with_tarfile(\@ARGV, \@git_similarity);
 | |
| # 1}}}
 | |
| # Step 1:  Initialize global constants.        {{{1
 | |
| #
 | |
| my $nFiles_Found = 0;  # updated in make_file_list
 | |
| my (%Language_by_Extension, %Language_by_Script,
 | |
|     %Filters_by_Language, %Not_Code_Extension, %Not_Code_Filename,
 | |
|     %Language_by_File, %Scale_Factor, %Known_Binary_Archives,
 | |
|     %Language_by_Prefix, %EOL_Continuation_re,
 | |
|    );
 | |
| my $ALREADY_SHOWED_HEADER = 0;
 | |
| my $ALREADY_SHOWED_XML_SECTION = 0;
 | |
| my %Error_Codes = ( 'Unable to read'                => -1,
 | |
|                     'Neither file nor directory'    => -2,
 | |
|                     'Diff error (quoted comments?)' => -3,
 | |
|                     'Diff error, exceeded timeout'  => -4,
 | |
|                     'Line count, exceeded timeout'  => -5,
 | |
|                   );
 | |
| my %Extension_Collision = (
 | |
|     'ADSO/IDSM'                                     => [ 'adso' ] ,
 | |
|     'C#/Smalltalk'                                  => [ 'cs'   ] ,
 | |
|     'D/dtrace'                                      => [ 'd'    ] ,
 | |
|     'F#/Forth'                                      => [ 'fs'   ] ,
 | |
|     'Fortran 77/Forth'                              => [ 'f', 'for' ] ,
 | |
|     'IDL/Qt Project/Prolog/ProGuard'                => [ 'pro'  ] ,
 | |
|     'Lisp/Julia'                                    => [ 'jl'   ] ,
 | |
|     'Lisp/OpenCL'                                   => [ 'cl'   ] ,
 | |
|     'MATLAB/Mathematica/Objective-C/MUMPS/Mercury'  => [ 'm'    ] ,
 | |
|     'Pascal/Puppet'                                 => [ 'pp'   ] ,
 | |
|     'Perl/Prolog'                                   => [ 'pl', 'PL'  ] ,
 | |
|     'PHP/Pascal/Fortran'                            => [ 'inc'  ] ,
 | |
|     'Raku/Prolog'                                   => [ 'p6', 'P6'  ] ,
 | |
|     'Qt/Glade'                                      => [ 'ui'   ] ,
 | |
|     'TypeScript/Qt Linguist'                        => [ 'ts'   ] ,
 | |
|     'Verilog-SystemVerilog/Coq'                     => [ 'v'    ] ,
 | |
|     'Visual Basic/TeX/Apex Class'                   => [ 'cls'  ] ,
 | |
|     'Scheme/SaltStack'                              => [ 'sls'  ] ,
 | |
| );
 | |
| my @Autogen_to_ignore = no_autogen_files($list_no_autogen);
 | |
| if ($opt_force_lang_def) {
 | |
|     # replace cloc's definitions
 | |
|     read_lang_def(
 | |
|         $opt_force_lang_def    , #        Sample values:
 | |
|         \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77'
 | |
|         \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
 | |
|         \%Language_by_File     , # Language_by_File{makefile}  = 'make'
 | |
|         \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] =
 | |
|                                  #      [ 'remove_matches' , '^\s*#'  ]
 | |
|         \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
 | |
|         \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
 | |
|         \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
 | |
|         \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
 | |
|         );
 | |
| } else {
 | |
|     set_constants(               #
 | |
|         \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77'
 | |
|         \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
 | |
|         \%Language_by_File     , # Language_by_File{makefile}  = 'make'
 | |
|         \%Language_by_Prefix   , # Language_by_Prefix{Dockerfile}  = 'Dockerfile'
 | |
|         \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] =
 | |
|                                  #      [ 'remove_matches' , '^\s*#'  ]
 | |
|         \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
 | |
|         \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
 | |
|         \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
 | |
|         \%Known_Binary_Archives, # Known_Binary_Archives{.tar} = 1
 | |
|         \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
 | |
|         );
 | |
|         if ($opt_no_autogen) {
 | |
|             foreach my $F (@Autogen_to_ignore) { $Not_Code_Filename{ $F } = 1; }
 | |
|         }
 | |
| }
 | |
| if ($opt_read_lang_def) {
 | |
|     # augment cloc's definitions (keep cloc's where there are overlaps)
 | |
|     merge_lang_def(
 | |
|         $opt_read_lang_def     , #        Sample values:
 | |
|         \%Language_by_Extension, # Language_by_Extension{f}    = 'Fortran 77'
 | |
|         \%Language_by_Script   , # Language_by_Script{sh}      = 'Bourne Shell'
 | |
|         \%Language_by_File     , # Language_by_File{makefile}  = 'make'
 | |
|         \%Filters_by_Language  , # Filters_by_Language{Bourne Shell}[0] =
 | |
|                                  #      [ 'remove_matches' , '^\s*#'  ]
 | |
|         \%Not_Code_Extension   , # Not_Code_Extension{jpg}     = 1
 | |
|         \%Not_Code_Filename    , # Not_Code_Filename{README}   = 1
 | |
|         \%Scale_Factor         , # Scale_Factor{Perl}          = 4.0
 | |
|         \%EOL_Continuation_re  , # EOL_Continuation_re{C++}    = '\\$'
 | |
|         );
 | |
| }
 | |
| if ($opt_lang_no_ext and !defined $Filters_by_Language{$opt_lang_no_ext}) {
 | |
|     die_unknown_lang($opt_lang_no_ext, "--lang-no-ext")
 | |
| }
 | |
| check_scale_existence(\%Filters_by_Language, \%Language_by_Extension,
 | |
|                       \%Scale_Factor);
 | |
| 
 | |
| my $nCounted = 0;
 | |
| 
 | |
| # Process command line provided extension-to-language mapping overrides.
 | |
| # Make a hash of known languages in lower case for easier matching.
 | |
| my %Recognized_Language_lc = (); # key = language name in lc, value = true name
 | |
| foreach my $language (keys %Filters_by_Language) {
 | |
|     my $lang_lc = lc $language;
 | |
|     $Recognized_Language_lc{$lang_lc} = $language;
 | |
| }
 | |
| my %Forced_Extension = (); # file name extensions which user wants to count
 | |
| my $All_One_Language = 0;  # set to !0 if --force-lang's <ext> is missing
 | |
| foreach my $pair (@opt_force_lang) {
 | |
|     my ($lang, $extension) = split(',', $pair);
 | |
|     my $lang_lc = lc $lang;
 | |
|     if (defined $extension) {
 | |
|         $Forced_Extension{$extension} = $lang;
 | |
| 
 | |
|         die_unknown_lang($lang, "--force-lang")
 | |
|             unless $Recognized_Language_lc{$lang_lc};
 | |
| 
 | |
|         $Language_by_Extension{$extension} = $Recognized_Language_lc{$lang_lc};
 | |
|     } else {
 | |
|         # the scary case--count everything as this language
 | |
|         $All_One_Language = $Recognized_Language_lc{$lang_lc};
 | |
|     }
 | |
| }
 | |
| 
 | |
| foreach my $pair (@opt_script_lang) {
 | |
|     my ($lang, $script_name) = split(',', $pair);
 | |
|     my $lang_lc = lc $lang;
 | |
|     if (!defined $script_name) {
 | |
|         die "The --script-lang option requires a comma separated pair of ".
 | |
|             "strings.\n";
 | |
|     }
 | |
| 
 | |
|     die_unknown_lang($lang, "--script-lang")
 | |
|         unless $Recognized_Language_lc{$lang_lc};
 | |
| 
 | |
|     $Language_by_Script{$script_name} = $Recognized_Language_lc{$lang_lc};
 | |
| }
 | |
| 
 | |
| # If user provided a language definition file, make sure those
 | |
| # extensions aren't rejected.
 | |
| foreach my $ext (%Language_by_Extension) {
 | |
|     next unless defined $Not_Code_Extension{$ext};
 | |
|     delete $Not_Code_Extension{$ext};
 | |
| }
 | |
| 
 | |
| # If user provided file extensions to ignore, add these to
 | |
| # the exclusion list.
 | |
| foreach my $ext (map { $_ => 1 } split(/,/, $opt_exclude_ext ) ) {
 | |
|     $ext = lc $ext if $ON_WINDOWS or $opt_ignore_case_ext;
 | |
|     $Not_Code_Extension{$ext} = 1;
 | |
| }
 | |
| 
 | |
| # If SQL or --by-file output is requested, keep track of directory names
 | |
| # generated by File::Temp::tempdir and used to temporarily hold the results
 | |
| # of compressed archives.  Contents of the SQL table 't' will be much
 | |
| # cleaner if these meaningless directory names are stripped from the front
 | |
| # of files pulled from the archives.
 | |
| my %TEMP_DIR = ();
 | |
| my $TEMP_OFF =  0;  # Needed for --sdir; keep track of the number of
 | |
|                     # scratch directories made in this run to avoid
 | |
|                     # file overwrites by multiple extractions to same
 | |
|                     # sdir.
 | |
| # Also track locations where temporary installations, if necessary, of
 | |
| # Algorithm::Diff and/or Regexp::Common are done.  Make sure these
 | |
| # directories are not counted as inputs (ref bug #80 2012-11-23).
 | |
| my %TEMP_INST = ();
 | |
| 
 | |
| # invert %Language_by_Script hash to get an easy-to-look-up list of known
 | |
| # scripting languages
 | |
| my %Script_Language = map { $_ => 1 } values %Language_by_Script ;
 | |
| # 1}}}
 | |
| # Step 2:  Early exits for display, summation. {{{1
 | |
| #
 | |
| print_extension_info(   $opt_show_ext     ) if defined $opt_show_ext ;
 | |
| print_language_info(    $opt_show_lang, '') if defined $opt_show_lang;
 | |
| print_language_filters( $opt_explain      ) if defined $opt_explain  ;
 | |
| exit if (defined $opt_show_ext)  or
 | |
|         (defined $opt_show_lang) or
 | |
|         (defined $opt_explain)   or
 | |
|         $list_no_autogen;
 | |
| 
 | |
| Top_of_Processing_Loop:
 | |
| # Sorry, coding purists.  Using a goto to implement --count-and-diff
 | |
| # which has to do three passes over the main code, starting with
 | |
| # a clean slate each time.
 | |
| if ($opt_count_diff) {
 | |
|     @ARGV = ( $COUNT_DIFF_ARGV[ $opt_count_diff ] );
 | |
|     if ($opt_count_diff == 3) {
 | |
|         $opt_diff = 1;
 | |
|         @ARGV = @{$COUNT_DIFF_ARGV[ $opt_count_diff ]}; # last arg is list of list
 | |
|     } elsif ($opt_diff_list_files) {
 | |
|         $opt_diff = 0;
 | |
|     }
 | |
|     if ($opt_report_file) {
 | |
|         # Instead of just one output file, will have three.
 | |
|         # Keep their names unique otherwise results are clobbered.
 | |
|         # Replace file path separators with underscores otherwise
 | |
|         # may end up with illegal file names.
 | |
|         my ($fn_0, $fn_1) = (undef, undef);
 | |
|         if ($ON_WINDOWS) {
 | |
|             ($fn_0 = $ARGV[0]) =~ s{\\}{_}g;
 | |
|              $fn_0 =~ s{:}{_}g;
 | |
|              $fn_0 =~ s{/}{_}g;
 | |
|             ($fn_1 = $ARGV[1]) =~ s{\\}{_}g if defined $ARGV[1];
 | |
|              $fn_1 =~ s{:}{_}g              if defined $ARGV[1];
 | |
|              $fn_1 =~ s{/}{_}g              if defined $ARGV[1];
 | |
|         } else {
 | |
|             ($fn_0 = $ARGV[0]) =~ s{/}{_}g;
 | |
|             ($fn_1 = $ARGV[1]) =~ s{/}{_}g  if defined $ARGV[1];
 | |
|         }
 | |
| 
 | |
|         if      ($opt_count_diff == 3) {
 | |
|             $opt_report_file = $COUNT_DIFF_report_file . ".diff.$fn_0.$fn_1";
 | |
|         } else {
 | |
|             $opt_report_file = $COUNT_DIFF_report_file . ".$fn_0";
 | |
|         }
 | |
|     } else {
 | |
|         # STDOUT; print a header showing what it's working on
 | |
|         if ($opt_count_diff == 3) {
 | |
|             print "\ndiff $ARGV[0] $ARGV[1]::\n";
 | |
|         } else {
 | |
|             print "\n" if $opt_count_diff > 1;
 | |
|             print "$ARGV[0]::\n";
 | |
|         }
 | |
|     }
 | |
|     $ALREADY_SHOWED_HEADER      = 0;
 | |
|     $ALREADY_SHOWED_XML_SECTION = 0;
 | |
| }
 | |
| 
 | |
| #print "Before glob have [", join(",", @ARGV), "]\n";
 | |
| @ARGV = windows_glob(@ARGV) if $ON_WINDOWS;
 | |
| #print "after  glob have [", join(",", @ARGV), "]\n";
 | |
| 
 | |
| # filter out archive files if requested to do so
 | |
| if (defined $opt_skip_archive) {
 | |
|     my @non_archive = ();
 | |
|     foreach my $candidate (@ARGV) {
 | |
|         if ($candidate !~ m/${opt_skip_archive}$/) {
 | |
|             push @non_archive, $candidate;
 | |
| 
 | |
|         }
 | |
|     }
 | |
|     @ARGV = @non_archive;
 | |
| }
 | |
| 
 | |
| if ($opt_sum_reports and $opt_diff) {
 | |
|     my @results = ();
 | |
|     if ($opt_csv and !defined($opt_csv_delimiter)) {
 | |
|         $opt_csv_delimiter = ",";
 | |
|     }
 | |
|     if ($opt_list_file) { # read inputs from the list file
 | |
|         my @list = read_list_file($opt_list_file);
 | |
|         if ($opt_csv) {
 | |
|             @results = combine_csv_diffs($opt_csv_delimiter, \@list);
 | |
|         } else {
 | |
|             @results = combine_diffs(\@list);
 | |
|         }
 | |
|     } elsif ($opt_vcs) { # read inputs from the VCS generator
 | |
|         my @list = invoke_generator($opt_vcs, \@ARGV);
 | |
|         if ($opt_csv) {
 | |
|             @results = combine_csv_diffs($opt_csv_delimiter, \@list);
 | |
|         } else {
 | |
|             @results = combine_diffs(\@list);
 | |
|         }
 | |
|     } else { # get inputs from the command line
 | |
|         if ($opt_csv) {
 | |
|             @results = combine_csv_diffs($opt_csv_delimiter, \@ARGV);
 | |
|         } else {
 | |
|             @results = combine_diffs(\@ARGV);
 | |
|         }
 | |
|     }
 | |
|     if ($opt_report_file) {
 | |
|         write_file($opt_report_file, {}, @results);
 | |
|     } else {
 | |
|         print "\n", join("\n", @results), "\n";
 | |
|     }
 | |
|     exit;
 | |
| }
 | |
| if ($opt_sum_reports) {
 | |
|     my %Results = ();
 | |
|     foreach my $type( "by language", "by report file" ) {
 | |
|         my $found_lang = undef;
 | |
|         if ($opt_list_file or $opt_vcs) {
 | |
|             # read inputs from the list file
 | |
|             my @list;
 | |
|             if ($opt_vcs) {
 | |
|                 @list = invoke_generator($opt_vcs, \@ARGV);
 | |
|             } else {
 | |
|                 @list = read_list_file($opt_list_file);
 | |
|             }
 | |
|             $found_lang = combine_results(\@list,
 | |
|                                            $type,
 | |
|                                           \%{$Results{ $type }},
 | |
|                                           \%Filters_by_Language );
 | |
|         } else { # get inputs from the command line
 | |
|             $found_lang = combine_results(\@ARGV,
 | |
|                                            $type,
 | |
|                                           \%{$Results{ $type }},
 | |
|                                           \%Filters_by_Language );
 | |
|         }
 | |
|         next unless %Results;
 | |
|         my $end_time = get_time();
 | |
|         my @results  = generate_report($VERSION, $end_time - $start_time,
 | |
|                                        $type,
 | |
|                                       \%{$Results{ $type }}, \%Scale_Factor);
 | |
|         if ($opt_report_file) {
 | |
|             my $ext  = ".lang";
 | |
|                $ext  = ".file" unless $type eq "by language";
 | |
|             next if !$found_lang and  $ext  eq ".lang";
 | |
|             write_file($opt_report_file . $ext, {}, @results);
 | |
|         } else {
 | |
|             print "\n", join("\n", @results), "\n";
 | |
|         }
 | |
|     }
 | |
|     exit;
 | |
| }
 | |
| if ($opt_write_lang_def or $opt_write_lang_def_incl_dup) {
 | |
|     my $file = $opt_write_lang_def          if $opt_write_lang_def;
 | |
|        $file = $opt_write_lang_def_incl_dup if $opt_write_lang_def_incl_dup;
 | |
|     write_lang_def($file                 ,
 | |
|                   \%Language_by_Extension,
 | |
|                   \%Language_by_Script   ,
 | |
|                   \%Language_by_File     ,
 | |
|                   \%Filters_by_Language  ,
 | |
|                   \%Not_Code_Extension   ,
 | |
|                   \%Not_Code_Filename    ,
 | |
|                   \%Scale_Factor         ,
 | |
|                   \%EOL_Continuation_re  ,
 | |
|                   );
 | |
|     exit;
 | |
| }
 | |
| if ($opt_show_os) {
 | |
|     if ($ON_WINDOWS) {
 | |
|         print "Windows\n";
 | |
|     } else {
 | |
|         print "UNIX\n";
 | |
|     }
 | |
|     exit;
 | |
| }
 | |
| 
 | |
| my $max_processes = get_max_processes();
 | |
| 
 | |
| # 1}}}
 | |
| # Step 3:  Create a list of files to consider. {{{1
 | |
| #  a) If inputs are binary archives, first cd to a temp
 | |
| #     directory, expand the archive with the user-given
 | |
| #     extraction tool, then add the temp directory to
 | |
| #     the list of dirs to process.
 | |
| #  b) Create a list of every file that might contain source
 | |
| #     code.  Ignore binary files, zero-sized files, and
 | |
| #     any file in a directory the user says to exclude.
 | |
| #  c) Determine the language for each file in the list.
 | |
| #
 | |
| my @binary_archive = ();
 | |
| my $cwd            = cwd();
 | |
| if ($opt_extract_with) {
 | |
| #print "cwd main = [$cwd]\n";
 | |
|     my @extract_location = ();
 | |
|     foreach my $bin_file (@ARGV) {
 | |
|         my $extract_dir = undef;
 | |
|         if ($opt_sdir) {
 | |
|             ++$TEMP_OFF;
 | |
|             $extract_dir = "$opt_sdir/$TEMP_OFF";
 | |
|             File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
 | |
|             File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
 | |
|         } else {
 | |
|             $extract_dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
 | |
|         }
 | |
|         $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
 | |
|         print "mkdir $extract_dir\n"  if $opt_v;
 | |
|         print "cd    $extract_dir\n"  if $opt_v;
 | |
|         chdir $extract_dir;
 | |
|         my $bin_file_full_path = "";
 | |
|         if (File::Spec->file_name_is_absolute( $bin_file )) {
 | |
|             $bin_file_full_path = $bin_file;
 | |
| #print "bin_file_full_path (was ful) = [$bin_file_full_path]\n";
 | |
|         } else {
 | |
|             $bin_file_full_path = File::Spec->catfile( $cwd, $bin_file );
 | |
| #print "bin_file_full_path (was rel) = [$bin_file_full_path]\n";
 | |
|         }
 | |
|         my     $extract_cmd = uncompress_archive_cmd($bin_file_full_path);
 | |
|         print  $extract_cmd, "\n" if $opt_v;
 | |
|         system $extract_cmd;
 | |
|         push @extract_location, $extract_dir;
 | |
|         chdir $cwd;
 | |
|     }
 | |
|     # It is possible that the binary archive itself contains additional
 | |
|     # files compressed the same way (true for Java .ear files).  Go
 | |
|     # through all the files that were extracted, see if they are binary
 | |
|     # archives and try to extract them.  Lather, rinse, repeat.
 | |
|     my $binary_archives_exist = 1;
 | |
|     my $count_binary_archives = 0;
 | |
|     my $previous_count        = 0;
 | |
|     my $n_pass                = 0;
 | |
|     while ($binary_archives_exist) {
 | |
|         @binary_archive = ();
 | |
|         foreach my $dir (@extract_location) {
 | |
|             find(\&archive_files, $dir);  # populates global @binary_archive
 | |
|         }
 | |
|         foreach my $archive (@binary_archive) {
 | |
|             my $extract_dir = undef;
 | |
|             if ($opt_sdir) {
 | |
|                 ++$TEMP_OFF;
 | |
|                 $extract_dir = "$opt_sdir/$TEMP_OFF";
 | |
|                 File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
 | |
|                 File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
 | |
|             } else {
 | |
|                 $extract_dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
 | |
|             }
 | |
|             $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
 | |
|             print "mkdir $extract_dir\n"  if $opt_v;
 | |
|             print "cd    $extract_dir\n"  if $opt_v;
 | |
|             chdir  $extract_dir;
 | |
| 
 | |
|             my     $extract_cmd = uncompress_archive_cmd($archive);
 | |
|             print  $extract_cmd, "\n" if $opt_v;
 | |
|             system $extract_cmd;
 | |
|             push @extract_location, $extract_dir;
 | |
|             unlink $archive;  # otherwise will be extracting it forever
 | |
|         }
 | |
|         $count_binary_archives = scalar @binary_archive;
 | |
|         if ($count_binary_archives == $previous_count) {
 | |
|             $binary_archives_exist = 0;
 | |
|         }
 | |
|         $previous_count = $count_binary_archives;
 | |
|     }
 | |
|     chdir $cwd;
 | |
| 
 | |
|     @ARGV = @extract_location;
 | |
| } else {
 | |
|     # see if any of the inputs need to be auto-uncompressed &/or expanded
 | |
|     my @updated_ARGS = ();
 | |
|     replace_git_hash_with_tarfile(\@ARGV, \@git_similarity) if $opt_force_git;
 | |
|     foreach my $Arg (@ARGV) {
 | |
|         if (is_dir($Arg)) {
 | |
|             push @updated_ARGS, $Arg;
 | |
|             next;
 | |
|         }
 | |
|         my $full_path = "";
 | |
|         if (File::Spec->file_name_is_absolute( $Arg )) {
 | |
|             $full_path = $Arg;
 | |
|         } else {
 | |
|             $full_path = File::Spec->catfile( $cwd, $Arg );
 | |
|         }
 | |
| #print "full_path = [$full_path]\n";
 | |
|         my $extract_cmd = uncompress_archive_cmd($full_path);
 | |
|         if ($extract_cmd) {
 | |
|             my $extract_dir = undef;
 | |
|             if ($opt_sdir) {
 | |
|                 ++$TEMP_OFF;
 | |
|                 $extract_dir = "$opt_sdir/$TEMP_OFF";
 | |
|                 File::Path::rmtree($extract_dir) if     is_dir($extract_dir);
 | |
|                 File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
 | |
|             } else {
 | |
|                 $extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
 | |
|             }
 | |
|             $TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
 | |
|             print "mkdir $extract_dir\n"  if $opt_v;
 | |
|             print "cd    $extract_dir\n"  if $opt_v;
 | |
|             chdir  $extract_dir;
 | |
|             print  $extract_cmd, "\n" if $opt_v;
 | |
|             system $extract_cmd;
 | |
|             push @updated_ARGS, $extract_dir;
 | |
|             chdir $cwd;
 | |
|         } else {
 | |
|             # this is a conventional, uncompressed, unarchived file
 | |
|             # or a directory; keep as-is
 | |
|             push @updated_ARGS, $Arg;
 | |
|         }
 | |
|     }
 | |
|     @ARGV = @updated_ARGS;
 | |
| 
 | |
|     # make sure we're not counting any directory containing
 | |
|     # temporary installations of Regexp::Common, Algorithm::Diff
 | |
|     foreach my $d (sort keys %TEMP_INST) {
 | |
|         foreach my $a (@ARGV) {
 | |
|             next unless is_dir($a);
 | |
|             if ($opt_v > 2) {
 | |
|                 printf "Comparing %s (location of %s) to input [%s]\n",
 | |
|                         $d, $TEMP_INST{$d}, $a;
 | |
|             }
 | |
|             if ($a eq $d) {
 | |
|                 die "File::Temp::tempdir chose directory ",
 | |
|                     $d, " to install ", $TEMP_INST{$d}, " but this ",
 | |
|                     "matches one of your input directories.  Rerun ",
 | |
|                     "with --sdir and supply a different temporary ",
 | |
|                     "directory for ", $TEMP_INST{$d}, "\n";
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| # 1}}}
 | |
| my @Errors    = ();
 | |
| my @file_list = ();  # global variable updated in files()
 | |
| my %Ignored   = ();  # files that are not counted (language not recognized or
 | |
|                      # problems reading the file)
 | |
| my @Lines_Out = ();
 | |
| my %upper_lower_map = ();  # global variable (needed only on Windows) to
 | |
|                            # track case of original filename, populated in
 | |
|                            # make_file_list() if $ON_WINDOWS
 | |
| if ($opt_diff) {
 | |
| # Step 4:  Separate code from non-code files.  {{{1
 | |
| my @fh            = ();
 | |
| my @files_for_set = ();
 | |
| my @files_added_tot = ();
 | |
| my @files_removed_tot = ();
 | |
| my @file_pairs_tot = ();
 | |
| # make file lists for each separate argument
 | |
| if ($opt_diff_list_file) {
 | |
|     @files_for_set = ( (), () );
 | |
|     file_pairs_from_file($opt_diff_list_file, # in
 | |
|                         \@files_added_tot   , # out
 | |
|                         \@files_removed_tot , # out
 | |
|                         \@file_pairs_tot    , # out
 | |
|                        );
 | |
|     foreach my $F (@files_added_tot) {
 | |
|         if ($ON_WINDOWS) {
 | |
|             (my $lc = lc $F) =~ s{\\}{/}g;
 | |
|             $upper_lower_map{$lc} = $F;
 | |
|             $F = $lc;
 | |
|         }
 | |
|         push @{$files_for_set[1]}, $F;
 | |
|     }
 | |
|     foreach my $F (@files_removed_tot) {
 | |
|         if ($ON_WINDOWS) {
 | |
|             (my $lc = lc $F) =~ s{\\}{/}g;
 | |
|             $upper_lower_map{$lc} = $F;
 | |
|             $F = $lc;
 | |
|         }
 | |
|         push @{$files_for_set[0]}, $F;
 | |
|     }
 | |
|     foreach my $pair (@file_pairs_tot) {
 | |
|         if ($ON_WINDOWS) {
 | |
|             push @{$files_for_set[0]}, lc $pair->[0];
 | |
|             push @{$files_for_set[1]}, lc $pair->[1];
 | |
|         } else {
 | |
|             push @{$files_for_set[0]}, $pair->[0];
 | |
|             push @{$files_for_set[1]}, $pair->[1];
 | |
|         }
 | |
|     }
 | |
|     @ARGV = (1, 2); # place holders
 | |
| }
 | |
| for (my $i = 0; $i < scalar @ARGV; $i++) {
 | |
|     if ($opt_diff_list_file) {
 | |
|         push @fh, make_file_list($files_for_set[$i], $i+1,
 | |
|                                 \%Error_Codes, \@Errors, \%Ignored);
 | |
|         @{$files_for_set[$i]} = @file_list;
 | |
|     } elsif ($opt_diff_list_files) {
 | |
|         my @list_files = read_list_file($ARGV[$i]);
 | |
|         push @fh, make_file_list(\@list_files, $i+1,
 | |
|                                 \%Error_Codes, \@Errors, \%Ignored);
 | |
|         @{$files_for_set[$i]} = @file_list;
 | |
|     } else {
 | |
|         push @fh, make_file_list([ $ARGV[$i] ], $i+1,
 | |
|                                 \%Error_Codes, \@Errors, \%Ignored);
 | |
|         @{$files_for_set[$i]} = @file_list;
 | |
|     }
 | |
|     if ($opt_exclude_list_file) {
 | |
|         # note: process_exclude_list_file() references global @file_list
 | |
|         process_exclude_list_file($opt_exclude_list_file,
 | |
|                                  \%Exclude_Dir,
 | |
|                                  \%Ignored);
 | |
|     }
 | |
|     if ($opt_no_autogen) {
 | |
|         exclude_autogenerated_files(\@{$files_for_set[$i]},  # in/out
 | |
|                                     \%Error_Codes, \@Errors, \%Ignored);
 | |
|     }
 | |
|     @file_list = ();
 | |
| }
 | |
| # 1}}}
 | |
| # Step 5:  Remove duplicate files.             {{{1
 | |
| #
 | |
| my %Language           = ();
 | |
| my %unique_source_file = ();
 | |
| my $n_set = 0;
 | |
| foreach my $FH (@fh) {  # loop over each pair of file sets
 | |
|     ++$n_set;
 | |
|     remove_duplicate_files($FH,
 | |
|                                \%{$Language{$FH}}               ,
 | |
|                                \%{$unique_source_file{$FH}}     ,
 | |
|                           \%Error_Codes                         ,
 | |
|                                \@Errors                         ,
 | |
|                                \%Ignored                        );
 | |
|     if ($opt_exclude_content) {
 | |
|         exclude_by_regex($opt_exclude_content,              # in
 | |
|                         \%{$unique_source_file{$FH}},       # in/out
 | |
|                         \%Ignored);                         # out
 | |
|     } elsif ($opt_include_content) {
 | |
|         include_by_regex($opt_include_content,              # in
 | |
|                         \%{$unique_source_file{$FH}},       # in/out
 | |
|                         \%Ignored);                         # out
 | |
|     }
 | |
| 
 | |
|     if ($opt_include_lang) {
 | |
|         # remove files associated with languages not
 | |
|         # specified by --include-lang
 | |
|         my @delete_file = ();
 | |
|         foreach my $file (keys %{$unique_source_file{$FH}}) {
 | |
|             my $keep_file = 0;
 | |
|             foreach my $keep_lang (keys %Include_Language) {
 | |
|                 if (lc($Language{$FH}{$file}) eq $keep_lang) {
 | |
|                     $keep_file = 1;
 | |
|                     last;
 | |
|                 }
 | |
|             }
 | |
|             next if $keep_file;
 | |
|             push @delete_file, $file;
 | |
|         }
 | |
|         foreach my $file (@delete_file) {
 | |
|             delete $Language{$FH}{$file};
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     printf "%2d: %8d unique file%s.                          \r",
 | |
|         $n_set,
 | |
|         plural_form(scalar keys %unique_source_file)
 | |
|         unless $opt_quiet;
 | |
| }
 | |
| # 1}}}
 | |
| # Step 6:  Count code, comments, blank lines.  {{{1
 | |
| #
 | |
| my %Results_by_Language = ();
 | |
| my %Results_by_File     = ();
 | |
| my %Delta_by_Language   = ();
 | |
| my %Delta_by_File       = ();
 | |
| 
 | |
| my %alignment = ();
 | |
| 
 | |
| my $fset_a = $fh[0];
 | |
| my $fset_b = $fh[1];
 | |
| 
 | |
| my $n_filepairs_compared = 0;
 | |
| my $tot_counted = 0;
 | |
| 
 | |
| if ( scalar @fh != 2 ) {
 | |
|     print "Error: incorrect length fh array when preparing diff at step 6.\n";
 | |
|     exit 1;
 | |
| }
 | |
| if (!$opt_diff_list_file) {
 | |
|     align_by_pairs(\%{$unique_source_file{$fset_a}}      , # in
 | |
|                    \%{$unique_source_file{$fset_b}}      , # in
 | |
|                    \@files_added_tot                     , # out
 | |
|                    \@files_removed_tot                   , # out
 | |
|                    \@file_pairs_tot                      , # out
 | |
|                   );
 | |
| }
 | |
| 
 | |
| #use Data::Dumper;
 | |
| #print "added : ", Dumper(\@files_added_tot);
 | |
| #print "removed : ", Dumper(\@files_removed_tot);
 | |
| #print "pairs : ", Dumper(\@file_pairs_tot);
 | |
| 
 | |
| if ( $max_processes == 0) {
 | |
|     # Multiprocessing is disabled
 | |
|     my $part = count_filesets ( $fset_a, $fset_b, \@files_added_tot,
 | |
|                                \@files_removed_tot, \@file_pairs_tot,
 | |
|                                0, \%Language, \%Ignored);
 | |
|     %Results_by_File = %{$part->{'results_by_file'}};
 | |
|     %Results_by_Language= %{$part->{'results_by_language'}};
 | |
|     %Delta_by_File = %{$part->{'delta_by_file'}};
 | |
|     %Delta_by_Language= %{$part->{'delta_by_language'}};
 | |
|     %Ignored = ( %Ignored, %{$part->{'ignored'}});
 | |
|     %alignment = %{$part->{'alignment'}};
 | |
|     $n_filepairs_compared = $part->{'n_filepairs_compared'};
 | |
|     push ( @Errors, @{$part->{'errors'}});
 | |
| } else {
 | |
|     # Multiprocessing is enabled
 | |
|     # Do not create more processes than the amount of data to be processed
 | |
|     my $num_processes = min(max(scalar @files_added_tot,
 | |
|                                 scalar @files_removed_tot,
 | |
|                                 scalar @file_pairs_tot),
 | |
|                             $max_processes);
 | |
|     # ... but use at least one process.
 | |
|        $num_processes = 1
 | |
|             if $num_processes == 0;
 | |
|     # Start processes for counting
 | |
|     my $pm = Parallel::ForkManager->new($num_processes);
 | |
|     # When processes finish, they will use the embedded subroutine for
 | |
|     # merging the data into global variables.
 | |
|     $pm->run_on_finish ( sub {
 | |
|         my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $part) = @_;
 | |
|         my $part_ignored = $part->{'ignored'};
 | |
|         my $part_result_by_file = $part->{'results_by_file'};
 | |
|         my $part_result_by_language = $part->{'results_by_language'};
 | |
|         my $part_delta_by_file = $part->{'delta_by_file'};
 | |
|         my $part_delta_by_language = $part->{'delta_by_language'};
 | |
|         my $part_alignment = $part->{'alignment'};
 | |
|         my $part_errors = $part->{'errors'};
 | |
|            $tot_counted += scalar keys %$part_result_by_file;
 | |
|            $n_filepairs_compared += $part->{'n_filepairs_compared'};
 | |
|         # Since files are processed by multiple processes, we can't measure
 | |
|         # the number of processed files exactly. We approximate this by showing
 | |
|         # the number of files counted by finished processes.
 | |
|         printf "Counting:  %d\r", $tot_counted
 | |
|                  if $opt_progress_rate;
 | |
| 
 | |
|         foreach my $this_language ( keys %$part_result_by_language ) {
 | |
|             my $counts = $part_result_by_language->{$this_language};
 | |
|             foreach my $inner_key ( keys %$counts ) {
 | |
|                 $Results_by_Language{$this_language}{$inner_key} +=
 | |
|                     $counts->{$inner_key};
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         foreach my $this_language ( keys %$part_delta_by_language ) {
 | |
|             my $counts = $part_delta_by_language->{$this_language};
 | |
|             foreach my $inner_key ( keys %$counts ) {
 | |
|                 my $statuses = $counts->{$inner_key};
 | |
|                 foreach my $inner_status ( keys %$statuses ) {
 | |
|                     $Delta_by_Language{$this_language}{$inner_key}{$inner_status} +=
 | |
|                           $counts->{$inner_key}->{$inner_status};
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         foreach my $label ( keys %$part_alignment ) {
 | |
|             my $inner = $part_alignment->{$label};
 | |
|             foreach my $key ( keys %$inner ) {
 | |
|                 $alignment{$label}{$key} = 1;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         %Results_by_File = ( %Results_by_File, %$part_result_by_file );
 | |
|         %Delta_by_File = ( %Delta_by_File, %$part_delta_by_file );
 | |
|         %Ignored = (%Ignored, %$part_ignored );
 | |
|         push ( @Errors, @$part_errors );
 | |
|     } );
 | |
| 
 | |
|     my $num_filepairs_per_part = ceil ( ( scalar @file_pairs_tot ) / $num_processes );
 | |
|     my $num_filesremoved_per_part = ceil ( ( scalar @files_removed_tot ) / $num_processes );
 | |
|     my $num_filesadded_per_part = ceil ( ( scalar @files_added_tot ) / $num_processes );
 | |
| 
 | |
|     while ( 1 ) {
 | |
|         my @files_added_part = splice @files_added_tot, 0, $num_filesadded_per_part;
 | |
|         my @files_removed_part = splice @files_removed_tot, 0, $num_filesremoved_per_part;
 | |
|         my @filepairs_part = splice @file_pairs_tot, 0, $num_filepairs_per_part;
 | |
|         if ( scalar @files_added_part == 0 and scalar @files_removed_part == 0 and
 | |
|              scalar @filepairs_part == 0 ) {
 | |
|             last;
 | |
|         }
 | |
| 
 | |
|         $pm->start() and next;
 | |
|         my $count_result = count_filesets ( $fset_a, $fset_b,
 | |
|             \@files_added_part, \@files_removed_part,
 | |
|             \@filepairs_part, 1, \%Language, \%Ignored );
 | |
|         $pm->finish(0 , $count_result);
 | |
|     }
 | |
|     # Wait for processes to finish
 | |
|     $pm->wait_all_children();
 | |
| }
 | |
| 
 | |
| # Write alignment data, if needed
 | |
| if ($opt_diff_alignment) {
 | |
|     write_alignment_data ( $opt_diff_alignment, $n_filepairs_compared, \%alignment ) ;
 | |
| }
 | |
| 
 | |
| my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ": ";
 | |
| my @ignored_reasons = map { "${_}${separator} $Ignored{$_}" } sort keys %Ignored;
 | |
| write_file($opt_ignored, {"file_type" => "ignored",
 | |
|                           "separator" => ": ",
 | |
|                           "columns"   => ["file", "reason"],
 | |
|                          }, @ignored_reasons   ) if $opt_ignored;
 | |
| write_file($opt_counted, {}, sort keys %Results_by_File) if $opt_counted;
 | |
| # 1}}}
 | |
| # Step 7:  Assemble results.                   {{{1
 | |
| #
 | |
| my $end_time = get_time();
 | |
| printf "%8d file%s ignored.                           \n",
 | |
|     plural_form(scalar keys %Ignored) unless $opt_quiet;
 | |
| print_errors(\%Error_Codes, \@Errors) if @Errors;
 | |
| if (!%Delta_by_Language) {
 | |
|     print "Nothing to count.\n";
 | |
|     exit;
 | |
| }
 | |
| 
 | |
| if ($opt_by_file) {
 | |
|     @Lines_Out = diff_report($VERSION, get_time() - $start_time,
 | |
|                             "by file",
 | |
|                             \%Delta_by_File, \%Scale_Factor);
 | |
| } else {
 | |
|     @Lines_Out = diff_report($VERSION, get_time() - $start_time,
 | |
|                             "by language",
 | |
|                             \%Delta_by_Language, \%Scale_Factor);
 | |
| }
 | |
| 
 | |
| # 1}}}
 | |
| } else {
 | |
| # Step 4:  Separate code from non-code files.  {{{1
 | |
| my $fh = 0;
 | |
| if ($opt_list_file or $opt_diff_list_files or $opt_vcs) {
 | |
|     my @list;
 | |
|     if ($opt_vcs) {
 | |
|         @list = invoke_generator($opt_vcs, \@ARGV);
 | |
|     } elsif ($opt_list_file) {
 | |
|         @list = read_list_file($opt_list_file);
 | |
|     } else {
 | |
|         @list = read_list_file($ARGV[0]);
 | |
|     }
 | |
|     $fh = make_file_list(\@list, 0, \%Error_Codes, \@Errors, \%Ignored);
 | |
| } else {
 | |
|     $fh = make_file_list(\@ARGV, 0, \%Error_Codes, \@Errors, \%Ignored);
 | |
|     #     make_file_list populates global variable @file_list via call to
 | |
|     #     File::Find's find() which in turn calls files()
 | |
| }
 | |
| if ($opt_exclude_list_file) {
 | |
|     # note: process_exclude_list_file() references global @file_list
 | |
|     process_exclude_list_file($opt_exclude_list_file,
 | |
|                              \%Exclude_Dir,
 | |
|                              \%Ignored);
 | |
| }
 | |
| if ($opt_skip_win_hidden and $ON_WINDOWS) {
 | |
|     my @file_list_minus_hidden = ();
 | |
|     # eval code to run on Unix without 'missing Win32::File module' error.
 | |
|     my $win32_file_invocation = '
 | |
|         use Win32::File;
 | |
|         foreach my $F (@file_list) {
 | |
|             my $attr = undef;
 | |
|             Win32::File::GetAttributes($F, $attr);
 | |
|             if ($attr & HIDDEN) {
 | |
|                 $Ignored{$F} = "Windows hidden file";
 | |
|                 print "Ignoring $F since it is a Windows hidden file\n"
 | |
|                     if $opt_v > 1;
 | |
|             } else {
 | |
|                 push @file_list_minus_hidden, $F;
 | |
|             }
 | |
|         }';
 | |
|     eval $win32_file_invocation;
 | |
|     @file_list = @file_list_minus_hidden;
 | |
| }
 | |
| if ($opt_no_autogen) {
 | |
|     exclude_autogenerated_files(\@file_list,  # in/out
 | |
|                                 \%Error_Codes, \@Errors, \%Ignored);
 | |
| }
 | |
| #printf "%8d file%s excluded.                     \n",
 | |
| #   plural_form(scalar keys %Ignored)
 | |
| #   unless $opt_quiet;
 | |
| # die print ": ", join("\n: ", @file_list), "\n";
 | |
| # 1}}}
 | |
| # Step 5:  Remove duplicate files.             {{{1
 | |
| #
 | |
| my %Language           = ();
 | |
| my %unique_source_file = ();
 | |
| remove_duplicate_files($fh                          ,   # in
 | |
|                            \%Language               ,   # out
 | |
|                            \%unique_source_file     ,   # out
 | |
|                       \%Error_Codes                 ,   # in
 | |
|                            \@Errors                 ,   # out
 | |
|                            \%Ignored                );  # out
 | |
| if ($opt_exclude_content) {
 | |
|     exclude_by_regex($opt_exclude_content,              # in
 | |
|                     \%unique_source_file ,              # in/out
 | |
|                     \%Ignored);                         # out
 | |
| } elsif ($opt_include_content) {
 | |
|     include_by_regex($opt_include_content,              # in
 | |
|                     \%unique_source_file ,              # in/out
 | |
|                     \%Ignored);                         # out
 | |
| }
 | |
| printf "%8d unique file%s.                              \n",
 | |
|     plural_form(scalar keys %unique_source_file)
 | |
|     unless $opt_quiet;
 | |
| # 1}}}
 | |
| # Step 6:  Count code, comments, blank lines.  {{{1
 | |
| #
 | |
| my %Results_by_Language = ();
 | |
| my %Results_by_File     = ();
 | |
| my @results_parts  = ();
 | |
| my @sorted_files = sort keys %unique_source_file;
 | |
| 
 | |
| if ( $max_processes == 0) {
 | |
|     # Multiprocessing is disabled
 | |
|     my $part = count_files ( \@sorted_files , 0, \%Language);
 | |
|     %Results_by_File = %{$part->{'results_by_file'}};
 | |
|     %Results_by_Language= %{$part->{'results_by_language'}};
 | |
|     %Ignored = ( %Ignored, %{$part->{'ignored'}});
 | |
|     push ( @Errors, @{$part->{'errors'}});
 | |
| } else {
 | |
|     # Do not create more processes than the number of files to be processed
 | |
|     my $num_files = scalar @sorted_files;
 | |
|     my $num_processes = $num_files >= $max_processes ? $max_processes : $num_files;
 | |
|     # Use at least one process.
 | |
|        $num_processes = 1
 | |
|             if $num_processes == 0;
 | |
|     # Start processes for counting
 | |
|     my $pm = Parallel::ForkManager->new($num_processes);
 | |
|     # When processes finish, they will use the embedded subroutine for
 | |
|     # merging the data into global variables.
 | |
|     $pm->run_on_finish ( sub {
 | |
|         my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $part) = @_;
 | |
|         my $part_ignored = $part->{'ignored'};
 | |
|         my $part_result_by_file = $part->{'results_by_file'};
 | |
|         my $part_result_by_language = $part->{'results_by_language'};
 | |
|         my $part_errors = $part->{'errors'};
 | |
|         my $nCounted+= scalar keys %$part_result_by_file;
 | |
|         # Since files are processed by multiple processes, we can't measure
 | |
|         # the number of processed files exactly. We approximate this by showing
 | |
|         # the number of files counted by finished processes.
 | |
|         printf "Counting:  %d\r", $nCounted
 | |
|                  if $opt_progress_rate;
 | |
| 
 | |
|         foreach my $this_language ( keys %$part_result_by_language ) {
 | |
|             my $counts = $part_result_by_language->{$this_language};
 | |
|             foreach my $inner_key ( keys %$counts ) {
 | |
|                 $Results_by_Language{$this_language}{$inner_key} +=
 | |
|                     $counts->{$inner_key};
 | |
|             }
 | |
|         }
 | |
|         %Results_by_File = ( %Results_by_File, %$part_result_by_file );
 | |
|         %Ignored = (%Ignored, %$part_ignored);
 | |
|         push ( @Errors, @$part_errors);
 | |
|     } );
 | |
|     my $num_files_per_part = ceil ( ( scalar @sorted_files ) / $num_processes );
 | |
|     while ( my @part = splice @sorted_files, 0 , $num_files_per_part ) {
 | |
|         $pm->start() and next;
 | |
|         my $count_result = count_files ( \@part, 1, \%Language );
 | |
|         $pm->finish(0 , $count_result);
 | |
|     }
 | |
|     # Wait for processes to finish
 | |
|     $pm->wait_all_children();
 | |
| }
 | |
| 
 | |
| my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ": ";
 | |
| my @ignored_reasons = map { "${_}${separator} $Ignored{$_}" } sort keys %Ignored;
 | |
| write_file($opt_ignored, {"file_type" => "ignored",
 | |
|                           "separator" => $separator,
 | |
|                           "columns"   => ["file", "reason"],
 | |
|                          }, @ignored_reasons   ) if $opt_ignored;
 | |
| if ($opt_summary_cutoff) {
 | |
|     %Results_by_Language = apply_cutoff($opt_summary_cutoff,
 | |
|                                        \%Results_by_Language);
 | |
| }
 | |
| write_file($opt_counted, {}, sort keys %Results_by_File) if $opt_counted;
 | |
| # 1}}}
 | |
| # Step 7:  Assemble results.                   {{{1
 | |
| #
 | |
| my $end_time = get_time();
 | |
| printf "%8d file%s ignored.\n", plural_form(scalar keys %Ignored)
 | |
|     unless $opt_quiet;
 | |
| print_errors(\%Error_Codes, \@Errors) if @Errors;
 | |
| if (!%Results_by_Language) {
 | |
|     write_null_results($opt_json, $opt_xml, $opt_report_file);
 | |
|     exit;
 | |
| }
 | |
| 
 | |
| generate_sql($end_time - $start_time,
 | |
|             \%Results_by_File, \%Scale_Factor) if $opt_sql;
 | |
| 
 | |
| exit if $skip_generate_report;
 | |
| if      ($opt_by_file_by_lang) {
 | |
|     push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
 | |
|                                       "by file",
 | |
|                                       \%Results_by_File,    \%Scale_Factor);
 | |
|     push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
 | |
|                                       "by language",
 | |
|                                       \%Results_by_Language, \%Scale_Factor);
 | |
| } elsif ($opt_by_file) {
 | |
|     push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
 | |
|                                       "by file",
 | |
|                                       \%Results_by_File,    \%Scale_Factor);
 | |
| } else {
 | |
|     push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
 | |
|                                       "by language",
 | |
|                                       \%Results_by_Language, \%Scale_Factor);
 | |
| }
 | |
| # 1}}}
 | |
| }
 | |
| if ($opt_fmt) {
 | |
|     my $json_string = "";
 | |
|     write_file(\$json_string, {}, @Lines_Out);
 | |
|     my ($file_len, $lang_len, $header, %contents) = load_json($json_string);
 | |
|     @Lines_Out = print_format_n(abs($opt_fmt), $file_len, $lang_len, $header, %contents);
 | |
| }
 | |
| if ($opt_report_file) {
 | |
|     write_file($opt_report_file, {}, @Lines_Out);
 | |
| } else {
 | |
|     if ($opt_fmt) {
 | |
|         print "@Lines_Out";
 | |
|     } else {
 | |
|         print "\n" unless $opt_quiet;
 | |
|         print join("\n", @Lines_Out), "\n";
 | |
|     }
 | |
| }
 | |
| if ($opt_count_diff) {
 | |
|     ++$opt_count_diff;
 | |
|     exit if $opt_count_diff > 3;
 | |
|     goto Top_of_Processing_Loop;
 | |
| }
 | |
| sub summary_cutoff_error {                   # {{{
 | |
|     my ($parameter) = @_;
 | |
|     print "-> summary_cutoff_is_ok($parameter)\n" if $opt_v > 2;
 | |
|     my %known_keys = ( 'c' => 1, 'f' => 1, 'm' => 1, 'cm' => 1 );
 | |
|     my $result = "";
 | |
|     my $by_pct = 0;
 | |
|     my ($key, $value);
 | |
|     if ($parameter !~ /:/) {
 | |
|         $result = "expected a colon in --summary-cutoff argument";
 | |
|     } else {
 | |
|         ($key, $value) = split(':', $parameter, 2);
 | |
|         if ($value =~ /%$/) {
 | |
|             $by_pct = 1;
 | |
|             $value =~ s/%$//;
 | |
|         }
 | |
|         if (!$known_keys{$key}) {
 | |
|             $result = "--summary-cutoff argument:  '$key' is not 'c', 'f', 'm' or 'cm'";
 | |
|         }
 | |
|         if ($value !~ /^\d+(\.\d*)?$/) {
 | |
|             $result = "--summary-cutoff argument:  '$value' is not a number";
 | |
|         }
 | |
|     }
 | |
|     print "<- summary_cutoff_is_ok($result)\n" if $opt_v > 2;
 | |
|     return $result;
 | |
| } # 1}}}
 | |
| sub apply_cutoff {                           # {{{1
 | |
|     my ($criterion,
 | |
|         $rhh_by_lang) = @_;
 | |
| 
 | |
|     my %aggregated_Results_by_Language = ();
 | |
|     my $by_pct = 0;
 | |
|     my ($key, $value) = split(':', $criterion, 2);
 | |
|     if ($value =~ /%$/) {
 | |
|         $by_pct = 1;
 | |
|         $value =~ s/%$//;
 | |
|     }
 | |
| 
 | |
|     my %sum = ();
 | |
|     if ($by_pct) {
 | |
|         foreach my $lang (keys %{$rhh_by_lang}) {
 | |
|             foreach my $category (qw(nFiles comment blank code)) {
 | |
|                 $sum{$category} += $rhh_by_lang->{$lang}{$category};
 | |
|             }
 | |
|         }
 | |
|         if      ($key eq 'c') {
 | |
|             $value *= $sum{'code'}/100;
 | |
|         } elsif ($key eq 'f') {
 | |
|             $value *= $sum{'nFiles'}/100;
 | |
|         } elsif ($key eq 'm') {
 | |
|             $value *= $sum{'comment'}/100;
 | |
|         } elsif ($key eq 'cm') {
 | |
|             $value *= ($sum{'code'} + $sum{'comment'})/100;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     foreach my $lang (keys %{$rhh_by_lang}) {
 | |
|         my %sum = ();
 | |
|         my $agg_lang = $lang;
 | |
|         if      ($key eq 'c') {
 | |
|             $agg_lang = 'Other' if $rhh_by_lang->{$lang}{'code'}    <= $value;
 | |
|         } elsif ($key eq 'f') {
 | |
|             $agg_lang = 'Other' if $rhh_by_lang->{$lang}{'nFiles'}  <= $value;
 | |
|         } elsif ($key eq 'm') {
 | |
|             $agg_lang = 'Other' if $rhh_by_lang->{$lang}{'comment'} <= $value;
 | |
|         } elsif ($key eq 'cm') {
 | |
|             $agg_lang = 'Other' if $rhh_by_lang->{$lang}{'code'} +
 | |
|                                       $rhh_by_lang->{$lang}{'comment'} <= $value;
 | |
|         }
 | |
|         foreach my $category (qw(nFiles comment blank code)) {
 | |
|             $aggregated_Results_by_Language{$agg_lang}{$category} +=
 | |
|                 $rhh_by_lang->{$lang}{$category};
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return %aggregated_Results_by_Language;
 | |
| } # 1}}}
 | |
| sub exclude_by_regex {                       # {{{1
 | |
|     my ($regex,
 | |
|         $rh_unique_source_file, # in/out
 | |
|         $rh_ignored           , # out
 | |
|        ) = @_;
 | |
|     my @exclude = ();
 | |
|     foreach my $file (keys %{$rh_unique_source_file}) {
 | |
|         my $line_num = 0;
 | |
|         foreach my $line (read_file($file)) {
 | |
|             ++$line_num;
 | |
|             if ($line =~ /$regex/) {
 | |
|                 $rh_ignored->{$file} = "line $line_num match for --exclude-content=$regex";
 | |
|                 push @exclude, $file;
 | |
|                 last;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     foreach my $file (@exclude) {
 | |
|         delete $rh_unique_source_file->{$file};
 | |
|     }
 | |
| } # 1}}}
 | |
| sub include_by_regex {                       # {{{1
 | |
|     my ($regex,
 | |
|         $rh_unique_source_file, # in/out
 | |
|         $rh_ignored           , # out
 | |
|        ) = @_;
 | |
|     my @exclude = ();
 | |
|     foreach my $file (keys %{$rh_unique_source_file}) {
 | |
|         my $keep_this_one = 0;
 | |
|         foreach my $line (read_file($file)) {
 | |
|             if ($line =~ /$regex/) {
 | |
|                 $keep_this_one = 1;
 | |
|                 last;
 | |
|             }
 | |
|         }
 | |
|         if (!$keep_this_one) {
 | |
|             $rh_ignored->{$file} = "does not satisfy --include-content=$regex";
 | |
|             push @exclude, $file;
 | |
|         }
 | |
|     }
 | |
|     foreach my $file (@exclude) {
 | |
|         delete $rh_unique_source_file->{$file};
 | |
|     }
 | |
| } # 1}}}
 | |
| sub get_max_processes {                      # {{{1
 | |
|     # If user has specified valid number of processes, use that.
 | |
|     if (defined $opt_processes) {
 | |
|         eval "use Parallel::ForkManager 0.7.6;";
 | |
|         if ( defined $Parallel::ForkManager::VERSION ) {
 | |
|             $HAVE_Parallel_ForkManager = 1;
 | |
|         }
 | |
|         if ( $opt_processes !~ /^\d+$/ ) {
 | |
|             print "Error: processes option argument must be numeric.\n";
 | |
|             exit 1;
 | |
|         }
 | |
|         elsif ( $opt_processes >0 and ! $HAVE_Parallel_ForkManager ) {
 | |
|             print "Error: cannot use multiple processes, because " .
 | |
|                   "Parallel::ForkManager is not installed, or the version is too old.\n";
 | |
|             exit 1;
 | |
|         }
 | |
|     elsif ( $opt_processes >0 and $ON_WINDOWS ) {
 | |
|             print "Error: cannot use multiple processes on Windows systems.\n";
 | |
|             exit 1;
 | |
|         }
 | |
|         else {
 | |
|             return $opt_processes;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     # Disable multiprocessing on Windows - does not work reliably
 | |
|     if ($ON_WINDOWS) {
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     # Disable multiprocessing if Parallel::ForkManager is not available
 | |
|     if ( ! $HAVE_Parallel_ForkManager ) {
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     # Set to number of cores on Linux
 | |
|     if ( $^O =~ /linux/i and -x '/usr/bin/nproc' ) {
 | |
|         my $numavcores_linux = `/usr/bin/nproc`;
 | |
|         chomp $numavcores_linux;
 | |
|         if ( $numavcores_linux =~ /^\d+$/ ) {
 | |
|             return $numavcores_linux;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     # Set to number of cores on macOS
 | |
|     if ( $^O =~ /darwin/i and -x '/usr/sbin/sysctl') {
 | |
|        my $numavcores_macos = `/usr/sbin/sysctl -n hw.physicalcpu`;
 | |
|        chomp $numavcores_macos;
 | |
|        if ($numavcores_macos =~ /^\d+$/ ) {
 | |
|            return $numavcores_macos;
 | |
|        }
 | |
|     }
 | |
| 
 | |
|     # Disable multiprocessing in other cases
 | |
|     return 0;
 | |
| } # 1}}}
 | |
| sub exclude_autogenerated_files {            # {{{1
 | |
|     my ($ra_file_list, # in/out
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|         $rh_Ignored  , # out
 | |
|        ) = @_;
 | |
|     print "-> exclude_autogenerated_files()\n" if $opt_v > 2;
 | |
|     my @file_list_minus_autogen = ();
 | |
|     foreach my $file (@{$ra_file_list}) {
 | |
|         if ($file !~ /\.go$/ && $file !~ /\.ʕ◔ϖ◔ʔ$/) {
 | |
|             # at the moment, only know Go autogenerated files
 | |
|             push @file_list_minus_autogen, $file;
 | |
|             next;
 | |
|         }
 | |
|         my $first_line = first_line($file, 1, $rh_Err, $raa_errors);
 | |
|         if ($first_line =~ m{^//\s+Code\s+generated\s+.*?\s+DO\s+NOT\s+EDIT\.$}) {
 | |
|             $rh_Ignored->{$file} = 'Go autogenerated file';
 | |
|         } else {
 | |
|             # Go, but not autogenerated
 | |
|             push @file_list_minus_autogen, $file;
 | |
|         }
 | |
|     }
 | |
|     @{$ra_file_list} = @file_list_minus_autogen;
 | |
| 
 | |
|     if ($opt_force_git) {
 | |
|         my $repo_dir = git_root_dir();
 | |
|         my @file_list_minus_linguist = ();
 | |
|         # if there's a .gitattributes file, look for linguist-generated
 | |
|         # and linguist-vendored entries to ignore
 | |
|         my $GA = ".gitattributes";
 | |
|         if (-f $GA) {
 | |
|             foreach my $line (read_file($GA)) {
 | |
|                 next unless $line =~ /^(.*?)\s+(linguist-(vendored|generated))/;
 | |
|                 my $re = glob2regex($1);
 | |
|                 foreach my $file (@{$ra_file_list}) {
 | |
|                     my $full_path = File::Spec->catfile($repo_dir, $file);
 | |
|                     my $rel_file  = File::Spec->abs2rel($full_path, $cwd);
 | |
|                     my $match = undef;
 | |
|                     if ($ON_WINDOWS) {
 | |
|                         $rel_file =~ s{\\}{/}g;
 | |
|                         $match = $rel_file =~ m{$re}i;
 | |
|                     } else {
 | |
|                         $match = $rel_file =~ m{$re};
 | |
|                     }
 | |
|                     if ($match) {
 | |
| #print "RULE [$rel_file] v [$re]\n";
 | |
|                         $rh_Ignored->{$file} = "matches $GA rule '$line'";
 | |
|                     } else {
 | |
|                         push @file_list_minus_linguist, $file;
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     print "<- exclude_autogenerated_files()\n" if $opt_v > 2;
 | |
| } # 1}}}
 | |
| sub git_root_dir {                           # {{{1
 | |
|     # if in a git repo, return the repo's top level directory
 | |
|     my $cmd = "git rev-parse --show-toplevel";
 | |
|     print $cmd, "\n" if $opt_v > 1;
 | |
|     my $dir = undef;
 | |
|     chomp($dir = `$cmd`);
 | |
|     die "Not in a git repository" unless $dir
 | |
| } # 1}}}
 | |
| sub file_extension {                         # {{{1
 | |
|     my ($fname, ) = @_;
 | |
|     $fname =~ m/\.(\w+)$/;
 | |
|     if ($1) {
 | |
|         return $1;
 | |
|     } else {
 | |
|         return "";
 | |
|     }
 | |
| } # 1}}}
 | |
| sub count_files {                            # {{{1
 | |
|     my ($filelist, $counter_type, $language_hash) = @_;
 | |
|     print "-> count_files()\n" if $opt_v > 2;
 | |
|     my @p_errors = ();
 | |
|     my %p_ignored = ();
 | |
|     my %p_rbl = ();
 | |
|     my %p_rbf = ();
 | |
|     my %Language = %{$language_hash};
 | |
| 
 | |
|     foreach my $file (@$filelist) {
 | |
|         if ( ! $counter_type ) {
 | |
|             # Multithreading disabled
 | |
|             $nCounted++;
 | |
| 
 | |
|             printf "Counting:  %d\r", $nCounted
 | |
|                  unless (!$opt_progress_rate or ($nCounted % $opt_progress_rate));
 | |
|         }
 | |
| 
 | |
|         next if $Ignored{$file};
 | |
|         if ($opt_include_ext and not $Include_Ext{ file_extension($file) }) {
 | |
|             $p_ignored{$file} = "not in --include-ext=$opt_include_ext";
 | |
|             next;
 | |
|         }
 | |
|         if ($opt_include_lang and not $Include_Language{lc($Language{$file})}) {
 | |
|             $p_ignored{$file} = "not in --include-lang=$opt_include_lang";
 | |
|             next;
 | |
|         }
 | |
|         if ($Exclude_Language{$Language{$file}}) {
 | |
|             $p_ignored{$file} = "--exclude-lang=$Language{$file}";
 | |
|             next;
 | |
|         }
 | |
|         if ($opt_force_lang_def and ($Language{$file} eq "XML") and
 | |
|             !defined $Filters_by_Language{XML}) {
 | |
|             # XML check is attempted for all unidentified text files.
 | |
|             # This can't be done if user forces language definition
 | |
|             # that excludes XML.  GH #596
 | |
|             next;
 | |
|         }
 | |
| 
 | |
|         my $Filters_by_Language_Language_file = ! @{$Filters_by_Language{$Language{$file}} };
 | |
|         if ($Filters_by_Language_Language_file) {
 | |
|             if ($Language{$file} eq "(unknown)") {
 | |
|                 $p_ignored{$file} = "language unknown (#1)";
 | |
|             } else {
 | |
|                 $p_ignored{$file} = "missing Filters_by_Language{$Language{$file}}";
 | |
|             }
 | |
|             next;
 | |
|         }
 | |
| 
 | |
|         my ($all_line_count, $blank_count, $comment_count, $code_count) = (0, 0, 0, 0);
 | |
|         if (!$opt_only_count_files) {
 | |
|             if ($opt_use_sloccount and $Language{$file} =~ /^(C|C\+\+|XML|PHP|Pascal|Java)$/) {
 | |
|                 chomp ($blank_count     = `grep -cv \"[^[:space:]]\" '$file'`);
 | |
|                 chomp ($all_line_count  = `cat '$file' | wc -l`);
 | |
|                 if      ($Language{$file} =~ /^(C|C\+\+)$/) {
 | |
|                     $code_count = `cat '$file' | c_count      | head -n 1`;
 | |
|                 } elsif ($Language{$file} eq "XML") {
 | |
|                     $code_count = `cat '$file' | xml_count    | head -n 1`;
 | |
|                 } elsif ($Language{$file} eq "PHP") {
 | |
|                     $code_count = `cat '$file' | php_count    | head -n 1`;
 | |
|                 } elsif ($Language{$file} eq "Pascal") {
 | |
|                     $code_count = `cat '$file' | pascal_count | head -n 1`;
 | |
|                 } elsif ($Language{$file} eq "Java") {
 | |
|                     $code_count = `cat '$file' | java_count   | head -n 1`;
 | |
|                 } else {
 | |
|                     die "SLOCCount match failure: file=[$file] lang=[$Language{$file}]";
 | |
|                 }
 | |
|                 $code_count = substr($code_count, 0, -2);
 | |
|                 $comment_count = $all_line_count - $code_count - $blank_count;
 | |
|             } else {
 | |
|                 ($all_line_count,
 | |
|                 $blank_count   ,
 | |
|                 $comment_count ,) = call_counter($file, $Language{$file}, \@Errors);
 | |
|                 $code_count = $all_line_count - $blank_count - $comment_count;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if ($opt_by_file) {
 | |
|             $p_rbf{$file}{'code'   } = $code_count     ;
 | |
|             $p_rbf{$file}{'blank'  } = $blank_count    ;
 | |
|             $p_rbf{$file}{'comment'} = $comment_count  ;
 | |
|             $p_rbf{$file}{'lang'   } = $Language{$file};
 | |
|             $p_rbf{$file}{'nFiles' } = 1;
 | |
|         } else {
 | |
|             $p_rbf{$file} = 1;  # just keep track of counted files
 | |
|         }
 | |
| 
 | |
|         $p_rbl{$Language{$file}}{'nFiles'}++;
 | |
|         $p_rbl{$Language{$file}}{'code'}    += $code_count   ;
 | |
|         $p_rbl{$Language{$file}}{'blank'}   += $blank_count  ;
 | |
|         $p_rbl{$Language{$file}}{'comment'} += $comment_count;
 | |
| 
 | |
|     }
 | |
|     print "<- count_files()\n" if $opt_v > 2;
 | |
|     return {
 | |
|         "ignored" => \%p_ignored,
 | |
|         "errors"  => \@p_errors,
 | |
|         "results_by_file" => \%p_rbf,
 | |
|         "results_by_language" => \%p_rbl,
 | |
|     }
 | |
| } # 1}}}
 | |
| sub count_filesets {                         # {{{1
 | |
|     my ($fset_a,
 | |
|         $fset_b,
 | |
|         $files_added,
 | |
|         $files_removed,
 | |
|         $file_pairs,
 | |
|         $counter_type,
 | |
|         $language_hash,
 | |
|         $rh_Ignored) = @_;
 | |
|     print "-> count_filesets()\n" if $opt_v > 2;
 | |
|     my @p_errors = ();
 | |
|     my %p_alignment = ();
 | |
|     my %p_ignored = ();
 | |
|     my %p_rbl = ();
 | |
|     my %p_rbf = ();
 | |
|     my %p_dbl = ();
 | |
|     my %p_dbf = ();
 | |
|     my %Language = %$language_hash;
 | |
| 
 | |
|     my $nCounted = 0;
 | |
| 
 | |
|     my %already_counted = (); # already_counted{ filename } = 1
 | |
| 
 | |
|     if (!@$file_pairs) {
 | |
|         # Special case where all files were either added or deleted.
 | |
|         # In this case, one of these arrays will be empty:
 | |
|         #   @files_added, @files_removed
 | |
|         # so loop over both to cover both cases.
 | |
|         my $status = @$files_added ? 'added' : 'removed';
 | |
|         my $fset = @$files_added ? $fset_b : $fset_a;
 | |
|         foreach my $file (@$files_added, @$files_removed) {
 | |
|             next unless defined $Language{$fset}{$file};
 | |
|             my $Lang = $Language{$fset}{$file};
 | |
|             next if $Lang eq '(unknown)';
 | |
|             my ($all_line_count,
 | |
|                 $blank_count   ,
 | |
|                 $comment_count ,
 | |
|                 ) = call_counter($file, $Lang, \@p_errors);
 | |
|             $already_counted{$file} = 1;
 | |
|             my $code_count = $all_line_count-$blank_count-$comment_count;
 | |
|             if ($opt_by_file) {
 | |
|                 $p_dbf{$file}{'code'   }{$status} += $code_count   ;
 | |
|                 $p_dbf{$file}{'blank'  }{$status} += $blank_count  ;
 | |
|                 $p_dbf{$file}{'comment'}{$status} += $comment_count;
 | |
|                 $p_dbf{$file}{'lang'   }{$status}  = $Lang         ;
 | |
|                 $p_dbf{$file}{'nFiles' }{$status} += 1             ;
 | |
|             }
 | |
|             $p_dbl{$Lang}{'code'   }{$status} += $code_count   ;
 | |
|             $p_dbl{$Lang}{'blank'  }{$status} += $blank_count  ;
 | |
|             $p_dbl{$Lang}{'comment'}{$status} += $comment_count;
 | |
|             $p_dbl{$Lang}{'nFiles' }{$status} += 1             ;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     #use Data::Dumper::Simple;
 | |
|     #use Data::Dumper;
 | |
|     #print Dumper(\@files_added, \@files_removed, \@file_pairs);
 | |
|     #print "after align_by_pairs:\n";
 | |
|     #print "added:\n";
 | |
| 
 | |
|     foreach my $f (@$files_added) {
 | |
|         next if $already_counted{$f};
 | |
|         #printf "%10s -> %s\n", $f, $Language{$fh[$F+1]}{$f};
 | |
|         # Don't proceed unless the file (both L and R versions)
 | |
|         # is in a known language.
 | |
|         next if $opt_include_ext
 | |
|             and not $Include_Ext{ file_extension($f) };
 | |
|         if (!defined $Language{$fset_b}{$f}) {
 | |
|             $p_ignored{$f} = "excluded or unknown language";
 | |
|             next;
 | |
|         }
 | |
|         next if $opt_include_lang
 | |
|             and not $Include_Language{lc($Language{$fset_b}{$f})};
 | |
|         my $this_lang = $Language{$fset_b}{$f};
 | |
|         if (!defined  $Language{$fset_b}{$f}) {
 | |
|             # shouldn't happen but could get here if using
 | |
|             # --diff-list-file which bypasses earlier checks
 | |
|             $p_ignored{$f} = "empty or uncharacterizeable file";
 | |
|             next;
 | |
|         }
 | |
|         if ($this_lang eq "(unknown)") {
 | |
|             $p_ignored{$f} = "unknown language";
 | |
|             next;
 | |
|         }
 | |
|         if ($Exclude_Language{$this_lang}) {
 | |
|             $p_ignored{$f} = "--exclude-lang=$this_lang";
 | |
|             next;
 | |
|         }
 | |
|         $p_alignment{"added"}{sprintf "  + %s ; %s\n", $f, $this_lang} = 1;
 | |
|         ++$p_dbl{ $this_lang }{'nFiles'}{'added'};
 | |
|         # Additionally, add contents of file $f to
 | |
|         # Delta_by_File{$f}{comment/blank/code}{'added'}
 | |
|         # Delta_by_Language{$lang}{comment/blank/code}{'added'}
 | |
|         # via the $p_dbl and $p_dbf variables.
 | |
|         my ($all_line_count,
 | |
|             $blank_count   ,
 | |
|             $comment_count ,
 | |
|            ) = call_counter($f, $this_lang, \@p_errors);
 | |
|         $p_dbl{ $this_lang }{'comment'}{'added'} += $comment_count;
 | |
|         $p_dbl{ $this_lang }{'blank'}{'added'}   += $blank_count;
 | |
|         $p_dbl{ $this_lang }{'code'}{'added'}    +=
 | |
|            $all_line_count - $blank_count - $comment_count;
 | |
|         $p_dbf{ $f }{'comment'}{'added'} = $comment_count;
 | |
|         $p_dbf{ $f }{'blank'}{'added'}   = $blank_count;
 | |
|         $p_dbf{ $f }{'code'}{'added'}    =
 | |
|            $all_line_count - $blank_count - $comment_count;
 | |
|     }
 | |
| 
 | |
|     #print "removed:\n";
 | |
|     foreach my $f (@$files_removed) {
 | |
|         next if $already_counted{$f};
 | |
|         # Don't proceed unless the file (both L and R versions)
 | |
|         # is in a known language.
 | |
|         next if $opt_include_ext
 | |
|             and not $Include_Ext{ file_extension($f) };
 | |
|         next if $opt_include_lang
 | |
|             and (not defined $Language{$fset_a}{$f}
 | |
|              or  not defined $Include_Language{lc($Language{$fset_a}{$f})});
 | |
|         my $this_lang = $Language{$fset_a}{$f};
 | |
|         if ((not defined $this_lang) or ($this_lang eq "(unknown)")) {
 | |
|             $p_ignored{$f} = "unknown language";
 | |
|             next;
 | |
|         }
 | |
|         if ($Exclude_Language{$this_lang}) {
 | |
|             $p_ignored{$f} = "--exclude-lang=$this_lang";
 | |
|             next;
 | |
|         }
 | |
|         ++$p_dbl{ $this_lang }{'nFiles'}{'removed'};
 | |
|         $p_alignment{"removed"}{sprintf "  - %s ; %s\n", $f, $this_lang} = 1;
 | |
|         #printf "%10s -> %s\n", $f, $Language{$fh[$F  ]}{$f};
 | |
|         # Additionally, add contents of file $f to
 | |
|         #        Delta_by_File{$f}{comment/blank/code}{'removed'}
 | |
|         #        Delta_by_Language{$lang}{comment/blank/code}{'removed'}
 | |
|         # via the $p_dbl and $p_dbf variables.
 | |
|         my ($all_line_count,
 | |
|             $blank_count   ,
 | |
|             $comment_count ,
 | |
|            ) = call_counter($f, $this_lang, \@p_errors);
 | |
|         $p_dbl{ $this_lang}{'comment'}{'removed'} += $comment_count;
 | |
|         $p_dbl{ $this_lang}{'blank'}{'removed'}   += $blank_count;
 | |
|         $p_dbl{ $this_lang}{'code'}{'removed'}    +=
 | |
|              $all_line_count - $blank_count - $comment_count;
 | |
|         $p_dbf{ $f }{'comment'}{'removed'} = $comment_count;
 | |
|         $p_dbf{ $f }{'blank'}{'removed'}   = $blank_count;
 | |
|         $p_dbf{ $f }{'code'}{'removed'}    =
 | |
|             $all_line_count - $blank_count - $comment_count;
 | |
|     }
 | |
| 
 | |
|     my $n_file_pairs_compared = 0;
 | |
|     # Don't know ahead of time how many file pairs will be compared
 | |
|     # since duplicates are weeded out below.  The answer is
 | |
|     # scalar @file_pairs only if there are no duplicates.
 | |
| 
 | |
|     foreach my $pair (@$file_pairs) {
 | |
|         my $file_L = $pair->[0];
 | |
|         my $file_R = $pair->[1];
 | |
|         my $Lang_L = $Language{$fset_a}{$file_L};
 | |
|         my $Lang_R = $Language{$fset_b}{$file_R};
 | |
|         if (!defined($Lang_L) or !defined($Lang_R)) {
 | |
|             print " -> count_filesets skipping $file_L, $file_R ",
 | |
|                   "because language cannot be inferred\n" if $opt_v;
 | |
|             next;
 | |
|         }
 | |
|         #print "main step 6 file_L=$file_L    file_R=$file_R\n";
 | |
|         ++$nCounted;
 | |
|         printf "Counting:  %d\r", $nCounted
 | |
|              unless ($counter_type or !$opt_progress_rate or ($nCounted % $opt_progress_rate));
 | |
|         next if $p_ignored{$file_L} or $p_ignored{$file_R};
 | |
| 
 | |
|         # filter out non-included extensions
 | |
|         if ($opt_include_ext  and not $Include_Ext{ file_extension($file_L) }
 | |
|                               and not $Include_Ext{ file_extension($file_R) }) {
 | |
|             $p_ignored{$file_L} = "not in --include-ext=$opt_include_ext";
 | |
|             $p_ignored{$file_R} = "not in --include-ext=$opt_include_ext";
 | |
|             next;
 | |
|         }
 | |
|         # filter out non-included languages
 | |
|         if ($opt_include_lang and not $Include_Language{lc($Lang_L)}
 | |
|                               and not $Include_Language{lc($Lang_R)}) {
 | |
|             $p_ignored{$file_L} = "not in --include-lang=$opt_include_lang";
 | |
|             $p_ignored{$file_R} = "not in --include-lang=$opt_include_lang";
 | |
|             next;
 | |
|         }
 | |
|         # filter out excluded or unrecognized languages
 | |
|         if ($Exclude_Language{$Lang_L} or $Exclude_Language{$Lang_R}) {
 | |
|             $p_ignored{$file_L} = "--exclude-lang=$Lang_L";
 | |
|             $p_ignored{$file_R} = "--exclude-lang=$Lang_R";
 | |
|             next;
 | |
|         }
 | |
| 
 | |
|         my $not_Filters_by_Language_Lang_LR = 0;
 | |
|         #print "file_LR = [$file_L] [$file_R]\n";
 | |
|         #print "Lang_LR = [$Lang_L] [$Lang_R]\n";
 | |
|         if (($Lang_L eq "(unknown)") or
 | |
|             ($Lang_R eq "(unknown)") or
 | |
|             !(@{$Filters_by_Language{$Lang_L} }) or
 | |
|             !(@{$Filters_by_Language{$Lang_R} })) {
 | |
|             $not_Filters_by_Language_Lang_LR = 1;
 | |
|         }
 | |
|         if ($not_Filters_by_Language_Lang_LR) {
 | |
|             if (($Lang_L eq "(unknown)") or ($Lang_R eq "(unknown)")) {
 | |
|                 $p_ignored{$fset_a}{$file_L} = "language unknown (#1)";
 | |
|                 $p_ignored{$fset_b}{$file_R} = "language unknown (#1)";
 | |
|             } else {
 | |
|                 $p_ignored{$fset_a}{$file_L} = "missing Filters_by_Language{$Lang_L}";
 | |
|                 $p_ignored{$fset_b}{$file_R} = "missing Filters_by_Language{$Lang_R}";
 | |
|             }
 | |
|             next;
 | |
|         }
 | |
| 
 | |
|         # filter out explicitly excluded files
 | |
|         if ($opt_exclude_list_file and
 | |
|             ($rh_Ignored->{$file_L} or $rh_Ignored->{$file_R})) {
 | |
|             my $msg_2;
 | |
|             if ($rh_Ignored->{$file_L}) {
 | |
|                 $msg_2 = "$file_L (paired to $file_R)";
 | |
|             } else {
 | |
|                 $msg_2 = "$file_R (paired to $file_L)";
 | |
|             }
 | |
|             my $msg_1 = "in --exclude-list-file=$opt_exclude_list_file";
 | |
|             $p_ignored{$file_L} = "$msg_1, $msg_2";
 | |
|             $p_ignored{$file_R} = "$msg_1, $msg_2";
 | |
|             next;
 | |
|         }
 | |
| 
 | |
|         #print "DIFF($file_L, $file_R)\n";
 | |
|         # step 0: compare the two files' contents
 | |
|         chomp ( my @lines_L = read_file($file_L) );
 | |
|         chomp ( my @lines_R = read_file($file_R) );
 | |
|         my $language_file_L = "";
 | |
|         if (defined $Language{$fset_a}{$file_L}) {
 | |
|             $language_file_L = $Language{$fset_a}{$file_L};
 | |
|         } else {
 | |
|             # files $file_L and $file_R do not contain known language
 | |
|             next;
 | |
|         }
 | |
| 
 | |
|         my $contents_are_same = 1;
 | |
|         if (scalar @lines_L == scalar @lines_R) {
 | |
|             # same size, must compare line-by-line
 | |
|             for (my $i = 0; $i < scalar @lines_L; $i++) {
 | |
|                if ($lines_L[$i] ne $lines_R[$i]) {
 | |
|                    $contents_are_same = 0;
 | |
|                    last;
 | |
|                }
 | |
|             }
 | |
|             if ($contents_are_same) {
 | |
|                 ++$p_dbl{$language_file_L}{'nFiles'}{'same'};
 | |
|             } else {
 | |
|                 ++$p_dbl{$language_file_L}{'nFiles'}{'modified'};
 | |
|             }
 | |
|         } else {
 | |
|             $contents_are_same = 0;
 | |
|             # different sizes, contents have changed
 | |
|             ++$p_dbl{$language_file_L}{'nFiles'}{'modified'};
 | |
|         }
 | |
| 
 | |
|         if ($opt_diff_alignment) {
 | |
|             my $str =  "$file_L | $file_R ; $language_file_L";
 | |
|             if ($contents_are_same) {
 | |
|                 $p_alignment{"pairs"}{"  == $str"} = 1;
 | |
|             } else {
 | |
|                 $p_alignment{"pairs"}{"  != $str"} = 1;
 | |
|             }
 | |
|             ++$n_file_pairs_compared;
 | |
|         }
 | |
| 
 | |
|         my ($all_line_count_L, $blank_count_L   , $comment_count_L ,
 | |
|             $all_line_count_R, $blank_count_R   , $comment_count_R , )  = (0,0,0,0,0,0,);
 | |
|         if (!$contents_are_same) {
 | |
|             # step 1: identify comments in both files
 | |
|             #print "Diff blank removal L language= $Lang_L";
 | |
|             #print " scalar(lines_L)=", scalar @lines_L, "\n";
 | |
|             my @original_minus_blanks_L
 | |
|                     = rm_blanks(  \@lines_L, $Lang_L, \%EOL_Continuation_re);
 | |
|             #print "1: scalar(original_minus_blanks_L)=", scalar @original_minus_blanks_L, "\n";
 | |
|             @lines_L    = @original_minus_blanks_L;
 | |
|             #print "2: scalar(lines_L)=", scalar @lines_L, "\n";
 | |
|             @lines_L    = add_newlines(\@lines_L); # compensate for rm_comments()
 | |
|             @lines_L    = rm_comments( \@lines_L, $Lang_L, $file_L,
 | |
|                                        \%EOL_Continuation_re);
 | |
|             #print "3: scalar(lines_L)=", scalar @lines_L, "\n";
 | |
| 
 | |
|             #print "Diff blank removal R language= $Lang_R\n";
 | |
|             my @original_minus_blanks_R
 | |
|                     = rm_blanks(  \@lines_R, $Lang_R, \%EOL_Continuation_re);
 | |
|             @lines_R    = @original_minus_blanks_R;
 | |
|             @lines_R    = add_newlines(\@lines_R); # taken away by rm_comments()
 | |
|             @lines_R    = rm_comments( \@lines_R, $Lang_R, $file_R,
 | |
|                                        \%EOL_Continuation_re);
 | |
| 
 | |
|             my (@diff_LL, @diff_LR, );
 | |
|                    array_diff( $file_L                  ,   # in
 | |
|                        \@original_minus_blanks_L ,   # in
 | |
|                        \@lines_L                 ,   # in
 | |
|                        "comment"                 ,   # in
 | |
|                        \@diff_LL, \@diff_LR      ,   # out
 | |
|                        \@p_errors);                    # in/out
 | |
| 
 | |
|             my (@diff_RL, @diff_RR, );
 | |
|                     array_diff( $file_R                  ,   # in
 | |
|                        \@original_minus_blanks_R ,   # in
 | |
|                        \@lines_R                 ,   # in
 | |
|                        "comment"                 ,   # in
 | |
|                        \@diff_RL, \@diff_RR      ,   # out
 | |
|                        \@p_errors);                    # in/out
 | |
|             # each line of each file is now classified as
 | |
|             # code or comment
 | |
|             #use Data::Dumper;
 | |
|             #print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
 | |
|             #print Dumper("diff_RL", \@diff_RL, "diff_RR", \@diff_RR, );
 | |
|             #die;
 | |
| 
 | |
|             # step 2: separate code from comments for L and R files
 | |
|             my @code_L = ();
 | |
|             my @code_R = ();
 | |
|             my @comm_L = ();
 | |
|             my @comm_R = ();
 | |
|             foreach my $line_info (@diff_LL) {
 | |
|                 if      ($line_info->{'type'} eq "code"   ) {
 | |
|                     push @code_L, $line_info->{char};
 | |
|                 } elsif ($line_info->{'type'} eq "comment") {
 | |
|                     push @comm_L, $line_info->{char};
 | |
|                 } else {
 | |
|                     die "Diff unexpected line type ",
 | |
|                         $line_info->{'type'}, "for $file_L line ",
 | |
|                         $line_info->{'lnum'};
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             foreach my $line_info (@diff_RL) {
 | |
|                 if      ($line_info->{type} eq "code"   ) {
 | |
|                     push @code_R, $line_info->{'char'};
 | |
|                 } elsif ($line_info->{type} eq "comment") {
 | |
|                     push @comm_R, $line_info->{'char'};
 | |
|                 } else {
 | |
|                     die "Diff unexpected line type ",
 | |
|                         $line_info->{'type'}, "for $file_R line ",
 | |
|                         $line_info->{'lnum'};
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             if ($opt_ignore_whitespace) {
 | |
|                 # strip all whitespace from each line of source code
 | |
|                 # and comments then use these stripped arrays in diffs
 | |
|                 foreach (@code_L) { s/\s+//g }
 | |
|                 foreach (@code_R) { s/\s+//g }
 | |
|                 foreach (@comm_L) { s/\s+//g }
 | |
|                 foreach (@comm_R) { s/\s+//g }
 | |
|             }
 | |
|             if ($opt_ignore_case) {
 | |
|                 # change all text to lowercase in diffs
 | |
|                 foreach (@code_L) { $_ = lc }
 | |
|                 foreach (@code_R) { $_ = lc }
 | |
|                 foreach (@comm_L) { $_ = lc }
 | |
|                 foreach (@comm_R) { $_ = lc }
 | |
|             }
 | |
|             # step 3: compute code diffs
 | |
|             array_diff("$file_L v. $file_R"   ,   # in
 | |
|                        \@code_L               ,   # in
 | |
|                        \@code_R               ,   # in
 | |
|                        "revision"             ,   # in
 | |
|                        \@diff_LL, \@diff_LR   ,   # out
 | |
|                        \@p_errors);                 # in/out
 | |
|             #print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
 | |
|             #print Dumper("diff_LR", \@diff_LR);
 | |
|             foreach my $line_info (@diff_LR) {
 | |
|                 my $status = $line_info->{'desc'}; # same|added|removed|modified
 | |
|                 ++$p_dbl{$Lang_L}{'code'}{$status};
 | |
|                 if ($opt_by_file) {
 | |
|                     ++$p_dbf{$file_L}{'code'}{$status};
 | |
|                 }
 | |
|             }
 | |
|             #use Data::Dumper;
 | |
|             #print Dumper("code diffs:", \@diff_LL, \@diff_LR);
 | |
| 
 | |
|             # step 4: compute comment diffs
 | |
|             array_diff("$file_L v. $file_R"   ,   # in
 | |
|                        \@comm_L               ,   # in
 | |
|                        \@comm_R               ,   # in
 | |
|                        "revision"             ,   # in
 | |
|                        \@diff_LL, \@diff_LR   ,   # out
 | |
|                        \@Errors);                 # in/out
 | |
|             #print Dumper("comment diff_LR", \@diff_LR);
 | |
|             foreach my $line_info (@diff_LR) {
 | |
|                 my $status = $line_info->{'desc'}; # same|added|removed|modified
 | |
|                 ++$p_dbl{$Lang_L}{'comment'}{$status};
 | |
|                 if ($opt_by_file) {
 | |
|                     ++$p_dbf{$file_L}{'comment'}{$status};
 | |
|                 }
 | |
|             }
 | |
|             #print Dumper("comment diffs:", \@diff_LL, \@diff_LR);
 | |
| 
 | |
|             # step 5: compute difference in blank lines (kind of pointless)
 | |
|             next if $Lang_L eq '(unknown)' or
 | |
|                     $Lang_R eq '(unknown)';
 | |
|             ($all_line_count_L,
 | |
|              $blank_count_L   ,
 | |
|              $comment_count_L ,
 | |
|             ) = call_counter($file_L, $Lang_L, \@Errors);
 | |
| 
 | |
|             ($all_line_count_R,
 | |
|              $blank_count_R   ,
 | |
|              $comment_count_R ,
 | |
|             ) = call_counter($file_R, $Lang_R, \@Errors);
 | |
|         } else {
 | |
|             # L and R file contents are identical, no need to diff
 | |
|             ($all_line_count_L,
 | |
|              $blank_count_L   ,
 | |
|              $comment_count_L ,
 | |
|             ) = call_counter($file_L, $Lang_L, \@Errors);
 | |
|             $all_line_count_R = $all_line_count_L;
 | |
|             $blank_count_R    = $blank_count_L   ;
 | |
|             $comment_count_R  = $comment_count_L ;
 | |
|             my $code_lines_R  = $all_line_count_R - ($blank_count_R + $comment_count_R);
 | |
|             $p_dbl{$Lang_L}{'blank'}{'same'}   += $blank_count_R;
 | |
|             $p_dbl{$Lang_L}{'comment'}{'same'} += $comment_count_R;
 | |
|             $p_dbl{$Lang_L}{'code'}{'same'}    += $code_lines_R;
 | |
|             if ($opt_by_file) {
 | |
|                 $p_dbf{$file_L}{'blank'}{'same'}   += $blank_count_R;
 | |
|                 $p_dbf{$file_L}{'comment'}{'same'} += $comment_count_R;
 | |
|                 $p_dbf{$file_L}{'code'}{'same'}    += $code_lines_R;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if ($blank_count_L <  $blank_count_R) {
 | |
|             my $D = $blank_count_R - $blank_count_L;
 | |
|             $p_dbl{$Lang_L}{'blank'}{'added'}   += $D;
 | |
|         } else {
 | |
|             my $D = $blank_count_L - $blank_count_R;
 | |
|             $p_dbl{$Lang_L}{'blank'}{'removed'} += $D;
 | |
|         }
 | |
|         if ($opt_by_file) {
 | |
|             if ($blank_count_L <  $blank_count_R) {
 | |
|                 my $D = $blank_count_R - $blank_count_L;
 | |
|                 $p_dbf{$file_L}{'blank'}{'added'}   += $D;
 | |
|             } else {
 | |
|                 my $D = $blank_count_L - $blank_count_R;
 | |
|                 $p_dbf{$file_L}{'blank'}{'removed'} += $D;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         my $code_count_L = $all_line_count_L-$blank_count_L-$comment_count_L;
 | |
|         if ($opt_by_file) {
 | |
|             $p_rbf{$file_L}{'code'   } = $code_count_L    ;
 | |
|             $p_rbf{$file_L}{'blank'  } = $blank_count_L   ;
 | |
|             $p_rbf{$file_L}{'comment'} = $comment_count_L ;
 | |
|             $p_rbf{$file_L}{'lang'   } = $Lang_L          ;
 | |
|             $p_rbf{$file_L}{'nFiles' } = 1                ;
 | |
|         } else {
 | |
|             $p_rbf{$file_L} = 1;  # just keep track of counted files
 | |
|         }
 | |
| 
 | |
|         $p_rbl{$Lang_L}{'nFiles'}++;
 | |
|         $p_rbl{$Lang_L}{'code'}    += $code_count_L   ;
 | |
|         $p_rbl{$Lang_L}{'blank'}   += $blank_count_L  ;
 | |
|         $p_rbl{$Lang_L}{'comment'} += $comment_count_L;
 | |
|     }
 | |
| 
 | |
|     print "<- count_filesets()\n" if $opt_v > 2;
 | |
|     return {
 | |
|         "ignored" => \%p_ignored,
 | |
|         "errors"  => \@p_errors,
 | |
|         "results_by_file" => \%p_rbf,
 | |
|         "results_by_language" => \%p_rbl,
 | |
|         "delta_by_file" => \%p_dbf,
 | |
|         "delta_by_language" => \%p_dbl,
 | |
|         "alignment" => \%p_alignment,
 | |
|         "n_filepairs_compared" => $n_file_pairs_compared
 | |
|     }
 | |
| } # 1}}}
 | |
| sub write_alignment_data {                   # {{{1
 | |
|     my ($filename, $n_filepairs_compared, $data ) = @_;
 | |
|     my @output = ();
 | |
|     if ( $data->{'added'} ) {
 | |
|         my %added_lines = %{$data->{'added'}};
 | |
|         push (@output, "Files added: " . (scalar keys %added_lines) . "\n");
 | |
|         foreach my $line ( sort keys %added_lines ) {
 | |
|             push (@output, $line);
 | |
|         }
 | |
|         push (@output, "\n" );
 | |
|     }
 | |
|     if ( $data->{'removed'} ) {
 | |
|         my %removed_lines = %{$data->{'removed'}};
 | |
|         push (@output, "Files removed: " . (scalar keys %removed_lines) . "\n");
 | |
|         foreach my $line ( sort keys %removed_lines ) {
 | |
|             push (@output, $line);
 | |
|         }
 | |
|         push (@output, "\n");
 | |
|     }
 | |
|     if ( $data->{'pairs'} ) {
 | |
|         my %pairs = %{$data->{'pairs'}};
 | |
|         push (@output, "File pairs compared: " . $n_filepairs_compared . "\n");
 | |
|         foreach my $pair ( sort keys %pairs ) {
 | |
|             push (@output, $pair);
 | |
|         }
 | |
|     }
 | |
|     write_file($filename, {}, @output);
 | |
| } # 1}}}
 | |
| sub exclude_dir_validates {                  # {{{1
 | |
|     my ($rh_Exclude_Dir) = @_;
 | |
|     my $is_OK = 1;
 | |
|     foreach my $dir (keys %{$rh_Exclude_Dir}) {
 | |
|         if (($ON_WINDOWS and $dir =~ m{\\}) or ($dir =~ m{/})) {
 | |
|             $is_OK = 0;
 | |
|             warn "--exclude-dir '$dir' :  cannot specify directory paths\n";
 | |
|         }
 | |
|     }
 | |
|     if (!$is_OK) {
 | |
|         warn "Use '--fullpath --not-match-d=REGEX' instead\n";
 | |
|     }
 | |
|     return $is_OK;
 | |
| } # 1}}}
 | |
| sub process_exclude_list_file {              # {{{1
 | |
|     my ($list_file      , # in
 | |
|         $rh_exclude_dir , # out
 | |
|         $rh_ignored     , # out
 | |
|        ) = @_;
 | |
|     # note: references global @file_list
 | |
|     print "-> process_exclude_list_file($list_file)\n" if $opt_v > 2;
 | |
|     # reject a specific set of files and/or directories
 | |
|     my @reject_list   = ($list_file); # don't count the exclude list file itself
 | |
|     push @reject_list, read_list_file($list_file);
 | |
|     my @file_reject_list = ();
 | |
|     foreach my $F_or_D (@reject_list) {
 | |
|         if (is_dir($F_or_D)) {
 | |
|             $rh_exclude_dir->{$F_or_D} = 1;
 | |
|         } elsif (is_file($F_or_D)) {
 | |
|             push @file_reject_list, $F_or_D;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     # Normalize file names for better comparison.
 | |
|     my %normalized_input   = normalize_file_names(@file_list);
 | |
|     my %normalized_reject  = normalize_file_names(@file_reject_list);
 | |
|     my %normalized_exclude = normalize_file_names(keys %{$rh_exclude_dir});
 | |
|     foreach my $F (keys %normalized_input) {
 | |
|         if ($normalized_reject{$F} or is_excluded($F, \%normalized_exclude)) {
 | |
|             my $orig_F = $normalized_input{$F};
 | |
|             $rh_ignored->{$orig_F} = "listed in exclusion file $opt_exclude_list_file";
 | |
|             print "Ignoring $orig_F because it appears in $opt_exclude_list_file\n"
 | |
|                 if $opt_v > 1;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print "<- process_exclude_list_file\n" if $opt_v > 2;
 | |
| } # 1}}}
 | |
| sub combine_results {                        # {{{1
 | |
|     # returns 1 if the inputs are categorized by language
 | |
|     #         0 if no identifiable language was found
 | |
|     my ($ra_report_files, # in
 | |
|         $report_type    , # in  "by language" or "by report file"
 | |
|         $rhh_count      , # out count{TYPE}{nFiles|code|blank|comment|scaled}
 | |
|         $rhaa_Filters_by_Language , # in
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> combine_results(report_type=$report_type)\n" if $opt_v > 2;
 | |
|     my $found_language = 0;
 | |
| 
 | |
|     foreach my $file (@{$ra_report_files}) {
 | |
|         my $n_results_found = 0;
 | |
|         my $IN = open_file('<', $file, 1);
 | |
|         if (!defined $IN) {
 | |
|             warn "Unable to read $file; ignoring.\n";
 | |
|             next;
 | |
|         }
 | |
|         while (<$IN>) {
 | |
|             next if /^(http|Language|SUM|-----)/;
 | |
|             if (!$opt_by_file  and
 | |
|                 m{^(.*?)\s+         # language
 | |
|                    (\d+)\s+         # files
 | |
|                    (\d+)\s+         # blank
 | |
|                    (\d+)\s+         # comments
 | |
|                    (\d+)\s+         # code
 | |
|                    (                #    next four entries missing with -no3
 | |
|                    x\s+             # x
 | |
|                    \d+\.\d+\s+      # scale
 | |
|                    =\s+             # =
 | |
|                    (\d+\.\d+)\s*    # scaled code
 | |
|                    )?
 | |
|                    $}x) {
 | |
|                 if ($report_type eq "by language") {
 | |
|                     if (!defined $rhaa_Filters_by_Language->{$1}) {
 | |
|                         warn "Unrecognized language '$1' in $file ignored\n";
 | |
|                         next;
 | |
|                     }
 | |
|                     # above test necessary to avoid trying to sum reports
 | |
|                     # of reports (which have no language breakdown).
 | |
|                     $found_language = 1;
 | |
|                     $rhh_count->{$1   }{'nFiles' } += $2;
 | |
|                     $rhh_count->{$1   }{'blank'  } += $3;
 | |
|                     $rhh_count->{$1   }{'comment'} += $4;
 | |
|                     $rhh_count->{$1   }{'code'   } += $5;
 | |
|                     $rhh_count->{$1   }{'scaled' } += $7 if $opt_3;
 | |
|                 } else {
 | |
|                     $rhh_count->{$file}{'nFiles' } += $2;
 | |
|                     $rhh_count->{$file}{'blank'  } += $3;
 | |
|                     $rhh_count->{$file}{'comment'} += $4;
 | |
|                     $rhh_count->{$file}{'code'   } += $5;
 | |
|                     $rhh_count->{$file}{'scaled' } += $7 if $opt_3;
 | |
|                 }
 | |
|                 ++$n_results_found;
 | |
|             } elsif ($opt_by_file  and
 | |
|                 m{^(.*?)\s+         # language
 | |
|                    (\d+)\s+         # blank
 | |
|                    (\d+)\s+         # comments
 | |
|                    (\d+)\s+         # code
 | |
|                    (                #    next four entries missing with -no3
 | |
|                    x\s+             # x
 | |
|                    \d+\.\d+\s+      # scale
 | |
|                    =\s+             # =
 | |
|                    (\d+\.\d+)\s*    # scaled code
 | |
|                    )?
 | |
|                    $}x) {
 | |
|                 if ($report_type eq "by language") {
 | |
|                     next unless %{$rhaa_Filters_by_Language->{$1}};
 | |
|                     # above test necessary to avoid trying to sum reports
 | |
|                     # of reports (which have no language breakdown).
 | |
|                     $found_language = 1;
 | |
|                     $rhh_count->{$1   }{'nFiles' } +=  1;
 | |
|                     $rhh_count->{$1   }{'blank'  } += $2;
 | |
|                     $rhh_count->{$1   }{'comment'} += $3;
 | |
|                     $rhh_count->{$1   }{'code'   } += $4;
 | |
|                     $rhh_count->{$1   }{'scaled' } += $6 if $opt_3;
 | |
|                 } else {
 | |
|                     $rhh_count->{$file}{'nFiles' } +=  1;
 | |
|                     $rhh_count->{$file}{'blank'  } += $2;
 | |
|                     $rhh_count->{$file}{'comment'} += $3;
 | |
|                     $rhh_count->{$file}{'code'   } += $4;
 | |
|                     $rhh_count->{$file}{'scaled' } += $6 if $opt_3;
 | |
|                 }
 | |
|                 ++$n_results_found;
 | |
|             }
 | |
|         }
 | |
|         warn "No counts found in $file--is the file format correct?\n"
 | |
|             unless $n_results_found;
 | |
|     }
 | |
|     print "<- combine_results\n" if $opt_v > 2;
 | |
|     return $found_language;
 | |
| } # 1}}}
 | |
| sub compute_denominator {                    # {{{1
 | |
|     my ($method, $nCode, $nComment, $nBlank, ) = @_;
 | |
|     print "-> compute_denominator\n" if $opt_v > 2;
 | |
|     my %den        = ( "c" => $nCode );
 | |
|        $den{"cm"}  = $den{"c"}  + $nComment;
 | |
|        $den{"cmb"} = $den{"cm"} + $nBlank;
 | |
|        $den{"cb"}  = $den{"c"}  + $nBlank;
 | |
| 
 | |
|     print "<- compute_denominator\n" if $opt_v > 2;
 | |
|     return $den{ $method };
 | |
| } # 1}}}
 | |
| sub yaml_to_json_separators {                # {{{1
 | |
|     # YAML and JSON are closely related.  Their differences can be captured
 | |
|     # by trailing commas ($C), braces ($open_B, $close_B), and
 | |
|     # quotes around text ($Q).
 | |
|     print "-> yaml_to_json_separators()\n" if $opt_v > 2;
 | |
|     my ($Q, $open_B, $close_B, $start, $C);
 | |
|     if ($opt_json) {
 | |
|        $C       = ',';
 | |
|        $Q       = '"';
 | |
|        $open_B  = '{';
 | |
|        $close_B = '}';
 | |
|        $start   = '{';
 | |
|     } else {
 | |
|        $C       = '';
 | |
|        $Q       = '' ;
 | |
|        $open_B  = '' ;
 | |
|        $close_B = '';
 | |
|        $start   = "---\n# $URL\n";
 | |
|     }
 | |
|     print "<- yaml_to_json_separators()\n" if $opt_v > 2;
 | |
|     return ($Q, $open_B, $close_B, $start, $C);
 | |
| } # 1}}}
 | |
| sub diff_report     {                        # {{{1
 | |
|     # returns an array of lines containing the results
 | |
|     print "-> diff_report\n" if $opt_v > 2;
 | |
| 
 | |
|     if ($opt_xml) {
 | |
|         print "<- diff_report\n" if $opt_v > 2;
 | |
|         return diff_xml_report(@_)
 | |
|     } elsif ($opt_yaml) {
 | |
|         print "<- diff_report\n" if $opt_v > 2;
 | |
|         return diff_yaml_report(@_)
 | |
|     } elsif ($opt_json) {
 | |
|         print "<- diff_report\n" if $opt_v > 2;
 | |
|         return diff_json_report(@_)
 | |
|     } elsif ($opt_csv or $opt_md) {
 | |
|         print "<- diff_report\n" if $opt_v > 2;
 | |
|         return diff_csv_report(@_)
 | |
|     }
 | |
| 
 | |
|     my ($version    , # in
 | |
|         $elapsed_sec, # in
 | |
|         $report_type, # in  "by language" | "by report file" | "by file"
 | |
|         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 | |
|         $rh_scale   , # in
 | |
|        ) = @_;
 | |
|     my %orig_case = ();
 | |
|     if ($ON_WINDOWS and $report_type eq "by file") {
 | |
|         # restore the original upper/lowercase version of the file name
 | |
|         foreach my $lc_file (sort keys %{$rhhh_count}) {
 | |
|           foreach my $cat (sort keys %{$rhhh_count->{$lc_file}}) {
 | |
|             foreach my $S (qw(added same modified removed)) {
 | |
|                 $orig_case{ $upper_lower_map{$lc_file} }{$cat}{$S} =
 | |
|                            $rhhh_count->{$lc_file}{$cat}{$S};
 | |
|             }
 | |
|           }
 | |
|         }
 | |
|         $rhhh_count = \%orig_case;
 | |
|     }
 | |
| 
 | |
| #use Data::Dumper;
 | |
| #print "diff_report: ", Dumper($rhhh_count), "\n";
 | |
|     my @results       = ();
 | |
| 
 | |
|     my $languages     = ();
 | |
|     my %sum           = (); # sum{nFiles|blank|comment|code}{same|modified|added|removed}
 | |
|     my $max_len       = 0;
 | |
|     foreach my $language (keys %{$rhhh_count}) {
 | |
|         foreach my $V (qw(nFiles blank comment code)) {
 | |
|             foreach my $S (qw(added same modified removed)) {
 | |
|                 $rhhh_count->{$language}{$V}{$S} = 0 unless
 | |
|                     defined $rhhh_count->{$language}{$V}{$S};
 | |
|                 $sum{$V}{$S}  += $rhhh_count->{$language}{$V}{$S};
 | |
|             }
 | |
|         }
 | |
|         $max_len      = length($language) if length($language) > $max_len;
 | |
|     }
 | |
|     my $column_1_offset = 0;
 | |
|        $column_1_offset = $max_len - 17 if $max_len > 17;
 | |
|     $elapsed_sec = 0.5 unless $elapsed_sec;
 | |
| 
 | |
|     my $spacing_0 = 23;
 | |
|     my $spacing_1 = 13;
 | |
|     my $spacing_2 =  9;
 | |
|     my $spacing_3 = 17;
 | |
|     if (!$opt_3) {
 | |
|         $spacing_1 = 19;
 | |
|         $spacing_2 = 14;
 | |
|         $spacing_3 = 27;
 | |
|     }
 | |
|     $spacing_0 += $column_1_offset;
 | |
|     $spacing_1 += $column_1_offset;
 | |
|     $spacing_3 += $column_1_offset;
 | |
|     my %Format = (
 | |
|         '1' => { 'xml' => 'name="%s" ',
 | |
|                  'txt' => "\%-${spacing_0}s ",
 | |
|                },
 | |
|         '2' => { 'xml' => 'name="%s" ',
 | |
|                  'txt' => "\%-${spacing_3}s ",
 | |
|                },
 | |
|         '3' => { 'xml' => 'files_count="%d" ',
 | |
|                  'txt' => '%6d ',
 | |
|                },
 | |
|         '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
 | |
|                  'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
 | |
|                },
 | |
|         '5' => { 'xml' => 'blank="%.2f" comment="%.2f" code="%d" ',
 | |
|                  'txt' => "\%3.2f \%3.2f \%${spacing_2}d",
 | |
|                },
 | |
|         '6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
 | |
|                  'txt' => ' x %6.2f = %14.2f',
 | |
|                },
 | |
|     );
 | |
|     my $Style = "txt";
 | |
|        $Style = "xml" if $opt_xml ;
 | |
|        $Style = "xml" if $opt_yaml;  # not a typo; just set to anything but txt
 | |
|        $Style = "xml" if $opt_json;  # not a typo; just set to anything but txt
 | |
|        $Style = "xml" if $opt_csv ;  # not a typo; just set to anything but txt
 | |
| 
 | |
|     my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
 | |
|        $hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset)
 | |
|             if (!$opt_3) and (68 + $column_1_offset) > 79;
 | |
|     my $data_line  = "";
 | |
|     my $first_column;
 | |
|     my $BY_LANGUAGE = 0;
 | |
|     my $BY_FILE     = 0;
 | |
|     if      ($report_type eq "by language") {
 | |
|         $first_column = "Language";
 | |
|         $BY_LANGUAGE  = 1;
 | |
|     } elsif ($report_type eq "by file")     {
 | |
|         $first_column = "File";
 | |
|         $BY_FILE      = 1;
 | |
|     } else {
 | |
|         $first_column = "Report File";
 | |
|     }
 | |
| 
 | |
|     # column headers
 | |
|     if (!$opt_3 and $BY_FILE) {
 | |
|         my $spacing_n = $spacing_1 - 11;
 | |
|         $data_line  = sprintf "%-${spacing_n}s" , $first_column;
 | |
|     } else {
 | |
|         $data_line  = sprintf "%-${spacing_1}s ", $first_column;
 | |
|     }
 | |
|     if ($BY_FILE) {
 | |
|         $data_line .= sprintf "%${spacing_2}s"   , ""     ;
 | |
|     } else {
 | |
|         $data_line .= sprintf "%${spacing_2}s "  , "files";
 | |
|     }
 | |
|     my $PCT_symbol = "";
 | |
|        $PCT_symbol = " \%" if $opt_by_percent;
 | |
|     $data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
 | |
|         "blank${PCT_symbol}"         ,
 | |
|         "comment${PCT_symbol}"       ,
 | |
|         "code";
 | |
| 
 | |
|     if ($Style eq "txt") {
 | |
|         push @results, $data_line;
 | |
|         push @results, $hyphen_line;
 | |
|     }
 | |
| 
 | |
|     # sort diff output in descending order of cumulative entries
 | |
|     foreach my $lang_or_file (sort {
 | |
|                                 ($rhhh_count->{$b}{'code'}{'added'}    +
 | |
|                                  $rhhh_count->{$b}{'code'}{'same'}     +
 | |
|                                  $rhhh_count->{$b}{'code'}{'modified'} +
 | |
|                                  $rhhh_count->{$b}{'code'}{'removed'}  )  <=>
 | |
|                                 ($rhhh_count->{$a}{'code'}{'added'}    +
 | |
|                                  $rhhh_count->{$a}{'code'}{'same'}     +
 | |
|                                  $rhhh_count->{$a}{'code'}{'modified'} +
 | |
|                                  $rhhh_count->{$a}{'code'}{'removed'})
 | |
|                               or $a cmp $b }
 | |
|                                     keys %{$rhhh_count}) {
 | |
| 
 | |
|         if ($BY_FILE) {
 | |
|             push @results, rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
 | |
|         } else {
 | |
|             push @results, $lang_or_file;
 | |
|         }
 | |
|         foreach my $S (qw(same modified added removed)) {
 | |
|             my $indent = $spacing_1 - 2;
 | |
|             my $line .= sprintf " %-${indent}s", $S;
 | |
|             if ($BY_FILE) {
 | |
|                 $line .= sprintf "   ";
 | |
|             } else {
 | |
|                 $line .= sprintf "  %${spacing_2}s", $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
 | |
|             }
 | |
|             if ($opt_by_percent) {
 | |
|                 my $DEN = compute_denominator($opt_by_percent  ,
 | |
|                     $rhhh_count->{$lang_or_file}{'code'}{$S}   ,
 | |
|                     $rhhh_count->{$lang_or_file}{'comment'}{$S},
 | |
|                     $rhhh_count->{$lang_or_file}{'blank'}{$S}  );
 | |
|                 if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
 | |
|                     $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
 | |
|                         $rhhh_count->{$lang_or_file}{'blank'}{$S}   / $DEN * 100,
 | |
|                         $rhhh_count->{$lang_or_file}{'comment'}{$S} / $DEN * 100,
 | |
|                         $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
 | |
|                 } else {
 | |
|                     $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
 | |
|                         0.0, 0.0, $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
 | |
|                 }
 | |
|             } else {
 | |
|                 $line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
 | |
|                     $rhhh_count->{$lang_or_file}{'blank'}{$S}   ,
 | |
|                     $rhhh_count->{$lang_or_file}{'comment'}{$S} ,
 | |
|                     $rhhh_count->{$lang_or_file}{'code'}{$S}    ;
 | |
|             }
 | |
|             push @results, $line;
 | |
|         }
 | |
|     }
 | |
|     push @results, $hyphen_line;
 | |
|     push @results, "SUM:";
 | |
|     my $sum_files    = 0;
 | |
|     my $sum_lines    = 0;
 | |
|     foreach my $S (qw(same modified added removed)) {
 | |
|         my $indent = $spacing_1 - 2;
 | |
|         my $line .= sprintf " %-${indent}s", $S;
 | |
|             if ($BY_FILE) {
 | |
|                 $line .= sprintf "   ";
 | |
|                 $sum_files += 1;
 | |
|             } else {
 | |
|                 $line .= sprintf "  %${spacing_2}s", $sum{'nFiles'}{$S};
 | |
|                 $sum_files += $sum{'nFiles'}{$S};
 | |
|             }
 | |
|         if ($opt_by_percent) {
 | |
|             my $DEN = compute_denominator($opt_by_percent,
 | |
|                 $sum{'code'}{$S}, $sum{'comment'}{$S}, $sum{'blank'}{$S});
 | |
|             if ($sum{'code'}{$S} > 0) {
 | |
|                 $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
 | |
|                     $sum{'blank'}{$S}   / $DEN * 100,
 | |
|                     $sum{'comment'}{$S} / $DEN * 100,
 | |
|                     $sum{'code'}{$S}    ;
 | |
|             } else {
 | |
|                 $line .= sprintf " %14.2f %14.2f %${spacing_2}s",
 | |
|                     0.0, 0.0, $sum{'code'}{$S}    ;
 | |
|             }
 | |
|         } else {
 | |
|             $line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
 | |
|                 $sum{'blank'}{$S}   ,
 | |
|                 $sum{'comment'}{$S} ,
 | |
|                 $sum{'code'}{$S}    ;
 | |
|         }
 | |
|         $sum_lines += $sum{'blank'}{$S} + $sum{'comment'}{$S} + $sum{'code'}{$S};
 | |
|         push @results, $line;
 | |
|     }
 | |
| 
 | |
|     my $header_line  = sprintf "%s v %s", $URL, $version;
 | |
|        $header_line .= sprintf("  T=%.2f s (%.1f files/s, %.1f lines/s)",
 | |
|                         $elapsed_sec           ,
 | |
|                         $sum_files/$elapsed_sec,
 | |
|                         $sum_lines/$elapsed_sec) unless $opt_sum_reports or $opt_hide_rate;
 | |
|     if ($Style eq "txt") {
 | |
|         unshift @results, output_header($header_line, $hyphen_line, $BY_FILE);
 | |
|     }
 | |
| 
 | |
|     push @results, $hyphen_line;
 | |
|     write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
 | |
|     print "<- diff_report\n" if $opt_v > 2;
 | |
| 
 | |
|     return @results;
 | |
| } # 1}}}
 | |
| sub xml_yaml_or_json_header {                # {{{1
 | |
|     my ($URL, $version, $elapsed_sec, $sum_files, $sum_lines, $by_file) = @_;
 | |
|     print "-> xml_yaml_or_json_header\n" if $opt_v > 2;
 | |
|     my $header      = "";
 | |
|     my $file_rate   = $sum_files/$elapsed_sec;
 | |
|     my $line_rate   = $sum_lines/$elapsed_sec;
 | |
|     my $type        = "";
 | |
|        $type        = "diff_" if $opt_diff;
 | |
|     my $report_file = "";
 | |
|     if ($opt_report_file) {
 | |
|         my $Fname = $opt_report_file;
 | |
|         $Fname =~ s{\\}{\\\\}g if $ON_WINDOWS;
 | |
|         if ($opt_sum_reports) {
 | |
|             if ($by_file) {
 | |
|                 $report_file = "  <report_file>$Fname.file</report_file>"
 | |
|             } else {
 | |
|                 $report_file = "  <report_file>$Fname.lang</report_file>"
 | |
|             }
 | |
|         } else {
 | |
|             $report_file = "  <report_file>$Fname</report_file>"
 | |
|         }
 | |
|     }
 | |
|     if ($opt_xml) {
 | |
|         $header = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
 | |
|         $header .= "\n<?xml-stylesheet type=\"text/xsl\" href=\"" . $opt_xsl . "\"?>" if $opt_xsl;
 | |
|         if ($opt_hide_rate) {
 | |
|             $header .= "<${type}results>
 | |
| <header>
 | |
|   <cloc_url>$URL</cloc_url>
 | |
|   <cloc_version>$version</cloc_version>
 | |
|   <n_files>$sum_files</n_files>
 | |
|   <n_lines>$sum_lines</n_lines>";
 | |
|         } else {
 | |
|             $header .= "<${type}results>
 | |
| <header>
 | |
|   <cloc_url>$URL</cloc_url>
 | |
|   <cloc_version>$version</cloc_version>
 | |
|   <elapsed_seconds>$elapsed_sec</elapsed_seconds>
 | |
|   <n_files>$sum_files</n_files>
 | |
|   <n_lines>$sum_lines</n_lines>
 | |
|   <files_per_second>$file_rate</files_per_second>
 | |
|   <lines_per_second>$line_rate</lines_per_second>";
 | |
|         }
 | |
|         $header .= "\n$report_file"
 | |
|             if $opt_report_file;
 | |
|         $header .= "\n</header>";
 | |
|         if (%git_metadata) {
 | |
|             foreach my $target (keys %git_metadata) {
 | |
|                 $header .= "\n<source>";
 | |
|                 $header .= "\n  <target>$target</target>";
 | |
|                 $header .= "\n  <origin>$git_metadata{$target}{'origin'}</origin>";
 | |
|                 $header .= "\n  <branch>$git_metadata{$target}{'branch'}</branch>";
 | |
|                 $header .= "\n  <commit>$git_metadata{$target}{'commit'}</commit>";
 | |
|                 $header .= "\n</source>";
 | |
|             }
 | |
|         }
 | |
|     } elsif ($opt_yaml or $opt_json) {
 | |
|         my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
 | |
|         if ($opt_hide_rate) {
 | |
|             $header = "${start}${Q}header${Q} : $open_B
 | |
|   ${Q}cloc_url${Q}           : ${Q}$URL${Q}${C}
 | |
|   ${Q}cloc_version${Q}       : ${Q}$version${Q}${C}
 | |
|   ${Q}n_files${Q}            : $sum_files${C}
 | |
|   ${Q}n_lines${Q}            : $sum_lines";
 | |
|         } else {
 | |
|             $header = "${start}${Q}header${Q} : $open_B
 | |
|   ${Q}cloc_url${Q}           : ${Q}$URL${Q}${C}
 | |
|   ${Q}cloc_version${Q}       : ${Q}$version${Q}${C}
 | |
|   ${Q}elapsed_seconds${Q}    : $elapsed_sec${C}
 | |
|   ${Q}n_files${Q}            : $sum_files${C}
 | |
|   ${Q}n_lines${Q}            : $sum_lines${C}
 | |
|   ${Q}files_per_second${Q}   : $file_rate${C}
 | |
|   ${Q}lines_per_second${Q}   : $line_rate";
 | |
|         }
 | |
|         if ($opt_report_file) {
 | |
|             my $Fname = $opt_report_file;
 | |
|             $Fname =~ s{\\}{\\\\}g if $ON_WINDOWS;
 | |
|             if ($opt_sum_reports) {
 | |
|                 if ($by_file) {
 | |
|                     $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$Fname.file${Q}"
 | |
|                 } else {
 | |
|                     $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$Fname.lang${Q}"
 | |
|                 }
 | |
|             } else {
 | |
|                 $header .= "$C\n  ${Q}report_file${Q}        : ${Q}$Fname${Q}";
 | |
|             }
 | |
|         }
 | |
|         $header .= "${close_B}${C}";
 | |
|     }
 | |
|     print "<- xml_yaml_or_json_header\n" if $opt_v > 2;
 | |
|     return $header;
 | |
| } # 1}}}
 | |
| sub diff_yaml_report {                       # {{{1
 | |
|     # returns an array of lines containing the results
 | |
|     my ($version    , # in
 | |
|         $elapsed_sec, # in
 | |
|         $report_type, # in  "by language" | "by report file" | "by file"
 | |
|         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 | |
|         $rh_scale   , # in
 | |
|        ) = @_;
 | |
|     print "-> diff_yaml_report\n" if $opt_v > 2;
 | |
|     $elapsed_sec = 0.5 unless $elapsed_sec;
 | |
|     my @results       = ();
 | |
|     my %sum           = ();
 | |
|     my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
 | |
|         diff_header_sum($report_type, $rhhh_count, \%sum);
 | |
| 
 | |
|     if (!$ALREADY_SHOWED_HEADER) {
 | |
|         push @results,
 | |
|               xml_yaml_or_json_header($URL, $version, $elapsed_sec,
 | |
|                                  $sum_files, $sum_lines, $BY_FILE);
 | |
|         $ALREADY_SHOWED_HEADER = 1;
 | |
|     }
 | |
|     foreach my $S (qw(added same modified removed)) {
 | |
|         push @results, "$S :";
 | |
|         foreach my $F_or_L (keys %{$rhhh_count}) {
 | |
|             # force quoted language or filename in case these
 | |
|             # have embedded funny characters, issue #312
 | |
|             push @results, "  '" . rm_leading_tempdir($F_or_L, \%TEMP_DIR) . "' :";
 | |
|             foreach my $k (keys %{$rhhh_count->{$F_or_L}}) {
 | |
|                 next if $k eq "lang"; # present only in those cases
 | |
|                                       # where code exists for action $S
 | |
|                 $rhhh_count->{$F_or_L}{$k}{$S} = 0 unless
 | |
|                     defined $rhhh_count->{$F_or_L}{$k}{$S};
 | |
|                 push @results,
 | |
|                     "    $k : $rhhh_count->{$F_or_L}{$k}{$S}";
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     push @results, "SUM :";
 | |
|     foreach my $S (qw(added same modified removed)) {
 | |
|         push @results, "  $S :";
 | |
|         foreach my $topic (keys %sum) {
 | |
|             push @results, "    $topic : $sum{$topic}{$S}";
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print "<- diff_yaml_report\n" if $opt_v > 2;
 | |
| 
 | |
|     return @results;
 | |
| } # 1}}}
 | |
| sub diff_json_report {                       # {{{1
 | |
|     # returns an array of lines containing the results
 | |
|     my ($version    , # in
 | |
|         $elapsed_sec, # in
 | |
|         $report_type, # in  "by language" | "by report file" | "by file"
 | |
|         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 | |
|         $rh_scale   , # in
 | |
|        ) = @_;
 | |
|     print "-> diff_json_report\n" if $opt_v > 2;
 | |
|     $elapsed_sec = 0.5 unless $elapsed_sec;
 | |
|     my @results       = ();
 | |
|     my %sum           = ();
 | |
|     my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
 | |
|         diff_header_sum($report_type, $rhhh_count, \%sum);
 | |
| 
 | |
|     if (!$ALREADY_SHOWED_HEADER) {
 | |
|         push @results,
 | |
|               xml_yaml_or_json_header($URL, $version, $elapsed_sec,
 | |
|                                  $sum_files, $sum_lines, $BY_FILE);
 | |
|         $ALREADY_SHOWED_HEADER = 1;
 | |
|     }
 | |
|     foreach my $S (qw(added same modified removed)) {
 | |
|         push @results, " \"$S\" : {";
 | |
|         foreach my $F_or_L (keys %{$rhhh_count}) {
 | |
|             push @results, "  \"" . rm_leading_tempdir($F_or_L, \%TEMP_DIR) . "\" : {";
 | |
|             foreach my $k (keys %{$rhhh_count->{$F_or_L}}) {
 | |
|                 next if $k eq "lang"; # present only in those cases
 | |
|                                       # where code exists for action $S
 | |
|                 $rhhh_count->{$F_or_L}{$k}{$S} = 0 unless
 | |
|                     defined $rhhh_count->{$F_or_L}{$k}{$S};
 | |
|                 push @results,
 | |
|                     "    \"$k\" : $rhhh_count->{$F_or_L}{$k}{$S},";
 | |
|             }
 | |
|             $results[-1] =~ s/,\s*$//;
 | |
|             push @results, "  },"
 | |
|         }
 | |
|         $results[-1] =~ s/,\s*$//;
 | |
|         push @results, "  },"
 | |
|     }
 | |
| 
 | |
|     push @results, "  \"SUM\" : {";
 | |
|     foreach my $S (qw(added same modified removed)) {
 | |
|         push @results, "  \"$S\" : {";
 | |
|         foreach my $topic (keys %sum) {
 | |
|             push @results, "    \"$topic\" : $sum{$topic}{$S},";
 | |
|         }
 | |
|         $results[-1] =~ s/,\s*$//;
 | |
|         push @results, "},";
 | |
|     }
 | |
| 
 | |
|     $results[-1] =~ s/,\s*$//;
 | |
|     push @results, "} }";
 | |
|     print "<- diff_json_report\n" if $opt_v > 2;
 | |
|     return @results;
 | |
| } # 1}}}
 | |
| sub diff_header_sum {                        # {{{1
 | |
|     my ($report_type, # in  "by language" | "by report file" | "by file"
 | |
|         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 | |
|         $rhh_sum    , # out sum{nFiles|blank|comment|code}{same|modified|added|removed}
 | |
|        ) = @_;
 | |
| 
 | |
|     my $sum_files = 0;
 | |
|     my $sum_lines = 0;
 | |
|     foreach my $language (keys %{$rhhh_count}) {
 | |
|         foreach my $V (qw(nFiles blank comment code)) {
 | |
|             foreach my $S (qw(added same modified removed)) {
 | |
|                 $rhhh_count->{$language}{$V}{$S} = 0 unless
 | |
|                     defined $rhhh_count->{$language}{$V}{$S};
 | |
|                 $rhh_sum->{$V}{$S}  += $rhhh_count->{$language}{$V}{$S};
 | |
|                 if ($V eq "nFiles") {
 | |
|                     $sum_files += $rhhh_count->{$language}{$V}{$S};
 | |
|                 } else {
 | |
|                     $sum_lines += $rhhh_count->{$language}{$V}{$S};
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     my $BY_LANGUAGE = 0;
 | |
|     my $BY_FILE     = 0;
 | |
|     if      ($report_type eq "by language") {
 | |
|         $BY_LANGUAGE  = 1;
 | |
|     } elsif ($report_type eq "by file")     {
 | |
|         $BY_FILE      = 1;
 | |
|     }
 | |
|     return $sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE;
 | |
| } # 1}}}
 | |
| sub diff_xml_report {                        # {{{1
 | |
|     # returns an array of lines containing the results
 | |
|     my ($version    , # in
 | |
|         $elapsed_sec, # in
 | |
|         $report_type, # in  "by language" | "by report file" | "by file"
 | |
|         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 | |
|         $rh_scale   , # in
 | |
|        ) = @_;
 | |
|     print "-> diff_xml_report\n" if $opt_v > 2;
 | |
|     my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
 | |
| 
 | |
| #print "diff_report: ", Dumper($rhhh_count), "\n";
 | |
|     $elapsed_sec = 0.5 unless $elapsed_sec;
 | |
|     my @results       = ();
 | |
|     my %sum           = ();
 | |
|     my $languages     = ();
 | |
| 
 | |
|     my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
 | |
|         diff_header_sum($report_type, $rhhh_count, \%sum);
 | |
| 
 | |
|     my $data_line   = "";
 | |
| 
 | |
|     if (!$ALREADY_SHOWED_HEADER) {
 | |
|         push @results,
 | |
|               xml_yaml_or_json_header($URL, $version, $elapsed_sec,
 | |
|                                  $sum_files, $sum_lines, $BY_FILE);
 | |
|         $ALREADY_SHOWED_HEADER = 1;
 | |
|     }
 | |
| 
 | |
|     foreach my $S (qw(same modified added removed)) {
 | |
|         push @results, "  <$S>";
 | |
|         foreach my $lang_or_file (sort {
 | |
|                                      $rhhh_count->{$b}{'code'} <=>
 | |
|                                      $rhhh_count->{$a}{'code'}
 | |
|                                    }
 | |
|                               keys %{$rhhh_count}) {
 | |
|             my $L = "";
 | |
| 
 | |
|             if ($BY_FILE) {
 | |
|                 $L .= sprintf "    <file name=\"%s\" files_count=\"1\" ",
 | |
|                     xml_metachars(
 | |
|                         rm_leading_tempdir($lang_or_file, \%TEMP_DIR));
 | |
|             } else {
 | |
|                 $L .= sprintf "    <language name=\"%s\" files_count=\"%d\" ",
 | |
|                         $lang_or_file ,
 | |
|                         $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
 | |
|             }
 | |
|             if ($opt_by_percent) {
 | |
|               my $DEN = compute_denominator($opt_by_percent            ,
 | |
|                             $rhhh_count->{$lang_or_file}{'code'}{$S}   ,
 | |
|                             $rhhh_count->{$lang_or_file}{'comment'}{$S},
 | |
|                             $rhhh_count->{$lang_or_file}{'blank'}{$S}  );
 | |
|               foreach my $T (qw(blank comment)) {
 | |
|                   if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
 | |
|                     $L .= sprintf "%s=\"%.2f\" ",
 | |
|                             $T, $rhhh_count->{$lang_or_file}{$T}{$S} / $DEN * 100;
 | |
|                   } else {
 | |
|                     $L .= sprintf "%s=\"0.0\" ", $T;
 | |
|                   }
 | |
|               }
 | |
|               foreach my $T (qw(code)) {
 | |
|                   $L .= sprintf "%s=\"%d\" ",
 | |
|                           $T, $rhhh_count->{$lang_or_file}{$T}{$S};
 | |
|               }
 | |
|             } else {
 | |
|               foreach my $T (qw(blank comment code)) {
 | |
|                   $L .= sprintf "%s=\"%d\" ",
 | |
|                           $T, $rhhh_count->{$lang_or_file}{$T}{$S};
 | |
|               }
 | |
|             }
 | |
|             push @results, $L . "/>";
 | |
|         }
 | |
| 
 | |
| 
 | |
|         my $L = sprintf "    <total sum_files=\"%d\" ", $sum{'nFiles'}{$S};
 | |
|         if ($opt_by_percent) {
 | |
|           my $DEN = compute_denominator($opt_by_percent,
 | |
|                         $sum{'code'}{$S}   ,
 | |
|                         $sum{'comment'}{$S},
 | |
|                         $sum{'blank'}{$S}  );
 | |
|           foreach my $V (qw(blank comment)) {
 | |
|               if ($sum{'code'}{$S} > 0) {
 | |
|                   $L .= sprintf "%s=\"%.2f\" ", $V, $sum{$V}{$S} / $DEN * 100;
 | |
|               } else {
 | |
|                   $L .= sprintf "%s=\"0.0\" ", $V;
 | |
|               }
 | |
|           }
 | |
|           foreach my $V (qw(code)) {
 | |
|               $L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
 | |
|           }
 | |
|         } else {
 | |
|           foreach my $V (qw(blank comment code)) {
 | |
|               $L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
 | |
|           }
 | |
|         }
 | |
|         push @results, $L . "/>";
 | |
|         push @results, "  </$S>";
 | |
|     }
 | |
| 
 | |
|     push @results, "</diff_results>";
 | |
|     write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
 | |
|     print "<- diff_xml_report\n" if $opt_v > 2;
 | |
|     return @results;
 | |
| } # 1}}}
 | |
| sub diff_csv_report {                        # {{{1
 | |
|     # returns an array of lines containing the results
 | |
|     my ($version    , # in
 | |
|         $elapsed_sec, # in
 | |
|         $report_type, # in  "by language" | "by report file" | "by file"
 | |
|         $rhhh_count , # in  count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
 | |
|         $rh_scale   , # in  unused
 | |
|        ) = @_;
 | |
|     print "-> diff_csv_report\n" if $opt_v > 2;
 | |
| 
 | |
|     my @results       = ();
 | |
|     my $languages     = ();
 | |
| 
 | |
|     my $data_line   = "";
 | |
|     my $BY_LANGUAGE = 0;
 | |
|     my $BY_FILE     = 0;
 | |
|     if      ($report_type eq "by language") {
 | |
|         $BY_LANGUAGE  = 1;
 | |
|     } elsif ($report_type eq "by file")     {
 | |
|         $BY_FILE      = 1;
 | |
|     }
 | |
|     my $DELIM = ",";
 | |
|        $DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
 | |
|        $DELIM = "|" if defined $opt_md;
 | |
| 
 | |
|     $elapsed_sec = 0.5 unless $elapsed_sec;
 | |
| 
 | |
|     my $line = "Language${DELIM} ";
 | |
|        $line = "File${DELIM} " if $BY_FILE;
 | |
|     foreach my $item (qw(files blank comment code)) {
 | |
|         next if $BY_FILE and $item eq 'files';
 | |
|         foreach my $symbol ( '==', '!=', '+', '-', ) {
 | |
|             $line .= "$symbol $item${DELIM} ";
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     my $T_elapsed_sec = "T=$elapsed_sec s";
 | |
|        $T_elapsed_sec = "" if $opt_hide_rate;
 | |
| 
 | |
|     if ($opt_md) {
 | |
|         push @results, "cloc|$URL v $version $T_elapsed_sec";
 | |
|         push @results, "--- | ---";
 | |
|         push @results, "";
 | |
|         push @results, $line;
 | |
|         my @col_header  = ();
 | |
|         push @col_header, ":-------";
 | |
|         foreach (1..16) {
 | |
|             push @col_header, "-------:";
 | |
|         }
 | |
|         push @results, join("|", @col_header) . "|";
 | |
|     } else {
 | |
|         $line .= "\"$URL v $version $T_elapsed_sec\"";
 | |
|         push @results, $line;
 | |
|     }
 | |
| 
 | |
|     foreach my $lang_or_file (keys %{$rhhh_count}) {
 | |
|         $rhhh_count->{$lang_or_file}{'code'}{'added'} = 0 unless
 | |
|             defined $rhhh_count->{$lang_or_file}{'code'};
 | |
|     }
 | |
|     foreach my $lang_or_file (sort {
 | |
|                                  $rhhh_count->{$b}{'code'} <=>
 | |
|                                  $rhhh_count->{$a}{'code'}
 | |
|                                }
 | |
|                           keys %{$rhhh_count}) {
 | |
|         if ($BY_FILE) {
 | |
|             $line = rm_leading_tempdir($lang_or_file, \%TEMP_DIR) . "$DELIM ";
 | |
|         } else {
 | |
|             $line = $lang_or_file . "${DELIM} ";
 | |
|         }
 | |
|         if ($opt_by_percent) {
 | |
|           foreach my $item (qw(nFiles)) {
 | |
|               next if $BY_FILE and $item eq 'nFiles';
 | |
|               foreach my $symbol (qw(same modified added removed)) {
 | |
|                   if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
 | |
|                       $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
 | |
|                   } else {
 | |
|                       $line .= "0${DELIM} ";
 | |
|                   }
 | |
|               }
 | |
|           }
 | |
|           foreach my $item (qw(blank comment)) {
 | |
|               foreach my $symbol (qw(same modified added removed)) {
 | |
|                   if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol} and
 | |
|                       defined $rhhh_count->{$lang_or_file}{'code'}{$symbol} and
 | |
|                       $rhhh_count->{$lang_or_file}{'code'}{$symbol} > 0) {
 | |
|                       $line .= sprintf("%.2f", $rhhh_count->{$lang_or_file}{$item}{$symbol} / $rhhh_count->{$lang_or_file}{'code'}{$symbol} * 100).${DELIM};
 | |
|                   } else {
 | |
|                       $line .= "0.00${DELIM} ";
 | |
|                   }
 | |
|               }
 | |
|           }
 | |
|           foreach my $item (qw(code)) {
 | |
|               foreach my $symbol (qw(same modified added removed)) {
 | |
|                   if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
 | |
|                       $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
 | |
|                   } else {
 | |
|                       $line .= "0${DELIM} ";
 | |
|                   }
 | |
|               }
 | |
|           }
 | |
|         } else {
 | |
|           foreach my $item (qw(nFiles blank comment code)) {
 | |
|               next if $BY_FILE and $item eq 'nFiles';
 | |
|               foreach my $symbol (qw(same modified added removed)) {
 | |
|                   if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
 | |
|                       $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
 | |
|                   } else {
 | |
|                       $line .= "0${DELIM} ";
 | |
|                   }
 | |
|               }
 | |
|           }
 | |
|         }
 | |
|         push @results, $line;
 | |
|     }
 | |
| 
 | |
|     print "<- diff_csv_report\n" if $opt_v > 2;
 | |
|     return @results;
 | |
| } # 1}}}
 | |
| sub rm_leading_tempdir {                     # {{{1
 | |
|     my ($in_file, $rh_temp_dirs, ) = @_;
 | |
|     my $clean_filename = $in_file;
 | |
|     foreach my $temp_d (keys %{$rh_temp_dirs}) {
 | |
|         if ($ON_WINDOWS) {
 | |
|         # \ -> / necessary to allow the next if test's
 | |
|         # m{} to work in the presence of spaces in file names
 | |
|             $temp_d         =~ s{\\}{/}g;
 | |
|             $clean_filename =~ s{\\}{/}g;
 | |
|         }
 | |
|         if ($clean_filename =~ m{^$temp_d/}) {
 | |
|             $clean_filename =~ s{^$temp_d/}{};
 | |
|             last;
 | |
|         }
 | |
|     }
 | |
|     if ($ON_WINDOWS and $opt_by_file) { # then go back from / to \
 | |
|         if ($opt_json) {
 | |
|             $clean_filename =~ s{/}{\\\\}g;
 | |
|         } else {
 | |
|             $clean_filename =~ s{/}{\\}g;
 | |
|         }
 | |
|     }
 | |
|     return $clean_filename;
 | |
| } # 1}}}
 | |
| sub generate_sql    {                        # {{{1
 | |
|     my ($elapsed_sec, # in
 | |
|         $rhh_count  , # in  count{TYPE}{lang|code|blank|comment|scaled}
 | |
|         $rh_scale   , # in
 | |
|        ) = @_;
 | |
|     print "-> generate_sql\n" if $opt_v > 2;
 | |
| 
 | |
| #print "generate_sql A [$opt_sql_project]\n";
 | |
|     $opt_sql_project = cwd() unless defined $opt_sql_project;
 | |
|     $opt_sql_project = '' unless defined $opt_sql_project; # have seen cwd() fail
 | |
| #print "generate_sql B [$opt_sql_project]\n";
 | |
|     $opt_sql_project =~ s{/}{\\}g if $ON_WINDOWS;
 | |
| #print "generate_sql C [$opt_sql_project]\n";
 | |
| 
 | |
|     my $schema = undef;
 | |
|     if ($opt_sql_style eq "oracle") {
 | |
|         $schema = "
 | |
| CREATE TABLE metadata
 | |
| (
 | |
|   id          INTEGER PRIMARY KEY,
 | |
|   timestamp   TIMESTAMP,
 | |
|   project     VARCHAR2(500 CHAR),
 | |
|   elapsed_s   NUMBER(10, 6)
 | |
| )
 | |
| /
 | |
| 
 | |
| CREATE TABLE t
 | |
| (
 | |
|   id             INTEGER           ,
 | |
|   project        VARCHAR2(500 CHAR),
 | |
|   language       VARCHAR2(500 CHAR),
 | |
|   file_fullname  VARCHAR2(500 CHAR),
 | |
|   file_dirname   VARCHAR2(500 CHAR),
 | |
|   file_basename  VARCHAR2(500 CHAR),
 | |
|   nblank         INTEGER,
 | |
|   ncomment       INTEGER,
 | |
|   ncode          INTEGER,
 | |
|   nscaled        NUMBER(10, 6),
 | |
| FOREIGN KEY (id)
 | |
|     REFERENCES metadata (id)
 | |
| )
 | |
| /
 | |
| 
 | |
| ";
 | |
|     } else {
 | |
|         $schema = "
 | |
| create table metadata (          -- $URL v $VERSION
 | |
|                 id        integer primary key,
 | |
|                 timestamp varchar(500),
 | |
|                 Project   varchar(500),
 | |
|                 elapsed_s real);
 | |
| create table t        (
 | |
|                 id            integer        ,
 | |
|                 Project       varchar(500)   ,
 | |
|                 Language      varchar(500)   ,
 | |
|                 File          varchar(500)   ,
 | |
|                 File_dirname  varchar(500)   ,
 | |
|                 File_basename varchar(500)   ,
 | |
|                 nBlank        integer        ,
 | |
|                 nComment      integer        ,
 | |
|                 nCode         integer        ,
 | |
|                 nScaled       real           ,
 | |
|         foreign key (id)
 | |
|             references metadata (id));
 | |
| ";
 | |
|     }
 | |
|     $opt_sql = "-" if $opt_sql eq "1";
 | |
| 
 | |
|     my $open_mode = ">";
 | |
|        $open_mode = ">>" if $opt_sql_append;
 | |
| 
 | |
|     my $fh;
 | |
|     if ($ON_WINDOWS and $HAVE_Win32_Long_Path and $opt_sql ne "-") {
 | |
|         # only use the Win32::LongPath wrapper here when needed,
 | |
|         # and only when not writing to STDOUT.
 | |
|         $fh = open_file($open_mode, $opt_sql, 1);
 | |
|         die "Unable to write to $opt_sql\n" if !defined $fh;
 | |
|     } else {
 | |
|         $fh = new IO::File; # $opt_sql, "w";
 | |
|         if (!$fh->open("${open_mode}${opt_sql}")) {
 | |
|             die "Unable to write to $opt_sql  $!\n";
 | |
|         }
 | |
|     }
 | |
|     print $fh $schema unless defined $opt_sql_append;
 | |
| 
 | |
|     my $id = int(time());
 | |
|     my $insert_into_t = "insert into t ";
 | |
|     if ($opt_sql_style eq "oracle") {
 | |
|         printf $fh "insert into metadata values(%d, TO_TIMESTAMP('%s','yyyy-mm-dd hh24:mi:ss'), '%s', %f);\n",
 | |
|                     $id,
 | |
|                     strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
 | |
|                     $opt_sql_project, $elapsed_sec;
 | |
|     } elsif ($opt_sql_style eq "named_columns") {
 | |
|         print $fh "begin transaction;\n";
 | |
|         $insert_into_t .= "(id, Project, Language, File, File_dirname, File_basename, nBlank, nComment, nCode, nScaled )";
 | |
|     } else {
 | |
|         print $fh "begin transaction;\n";
 | |
|         printf $fh "insert into metadata values(%d, '%s', '%s', %f);\n",
 | |
|                     $id, strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
 | |
|                     $opt_sql_project, $elapsed_sec;
 | |
|     }
 | |
| 
 | |
|     my $nIns = 0;
 | |
|     foreach my $file (keys %{$rhh_count}) {
 | |
|         my $language = $rhh_count->{$file}{'lang'};
 | |
|         my $clean_filename = $file;
 | |
|         # If necessary (that is, if the input contained an
 | |
|         # archive file [.tar.gz, etc]), strip the temporary
 | |
|         # directory name which was used to expand the archive
 | |
|         # from the file name.
 | |
| 
 | |
|         $clean_filename = rm_leading_tempdir($clean_filename, \%TEMP_DIR);
 | |
|         $clean_filename =~ s/\'/''/g;  # double embedded single quotes
 | |
|                                        # to escape them
 | |
| 
 | |
|         printf $fh "$insert_into_t values(%d, '%s', '%s', '%s', '%s', '%s', " .
 | |
|                    "%d, %d, %d, %f);\n",
 | |
|                     $id                        ,
 | |
|                     $opt_sql_project           ,
 | |
|                     $language                  ,
 | |
|                     $clean_filename            ,
 | |
|                     dirname( $clean_filename)  ,
 | |
|                     basename($clean_filename)  ,
 | |
|                     $rhh_count->{$file}{'blank'},
 | |
|                     $rhh_count->{$file}{'comment'},
 | |
|                     $rhh_count->{$file}{'code'}   ,
 | |
|                     $rhh_count->{$file}{'code'}*$rh_scale->{$language};
 | |
| 
 | |
|         ++$nIns;
 | |
|         if (!($nIns % 10_000) and ($opt_sql_style ne "oracle")) {
 | |
|             print $fh "commit;\n";
 | |
|             print $fh "begin transaction;\n";
 | |
|         }
 | |
|     }
 | |
|     if ($opt_sql_style ne "oracle") {
 | |
|         print $fh "commit;\n";
 | |
|     }
 | |
| 
 | |
|     $fh->close unless $opt_sql eq "-"; # don't try to close STDOUT
 | |
|     print "<- generate_sql\n" if $opt_v > 2;
 | |
| 
 | |
|     # sample query:
 | |
|     #
 | |
|     #   select project, language,
 | |
|     #          sum(nCode)     as Code,
 | |
|     #          sum(nComment)  as Comments,
 | |
|     #          sum(nBlank)    as Blank,
 | |
|     #          sum(nCode)+sum(nComment)+sum(nBlank) as All_Lines,
 | |
|     #          100.0*sum(nComment)/(sum(nCode)+sum(nComment)) as Comment_Pct
 | |
|     #          from t group by Project, Language order by Project, Code desc;
 | |
|     #
 | |
| } # 1}}}
 | |
| sub output_header   {                        # {{{1
 | |
|     my ($header_line,
 | |
|         $hyphen_line,
 | |
|         $BY_FILE    ,)    = @_;
 | |
|     print "-> output_header\n" if $opt_v > 2;
 | |
|     my @R = ();
 | |
|     if      ($opt_xml) {
 | |
|         if (!$ALREADY_SHOWED_XML_SECTION) {
 | |
|             push @R, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
 | |
|             push @R, '<?xml-stylesheet type="text/xsl" href="' .
 | |
|                             $opt_xsl . '"?>' if $opt_xsl;
 | |
|             push @R, "<results>";
 | |
|             push @R, "<header>$header_line</header>";
 | |
|             $ALREADY_SHOWED_XML_SECTION = 1;
 | |
|         }
 | |
|         if ($BY_FILE) {
 | |
|             push @R, "<files>";
 | |
|         } else {
 | |
|             push @R, "<languages>";
 | |
|         }
 | |
|     } elsif ($opt_yaml) {
 | |
|         push @R, "---\n# $header_line";
 | |
|     } elsif ($opt_csv or $opt_md) {
 | |
|         # append the header to the end of the column headers
 | |
|         # to keep the output a bit cleaner from a spreadsheet
 | |
|         # perspective
 | |
|     } else {
 | |
|         if ($ALREADY_SHOWED_HEADER) {
 | |
|             push @R, "";
 | |
|         } else {
 | |
|             push @R, $header_line;
 | |
|             $ALREADY_SHOWED_HEADER = 1;
 | |
|         }
 | |
|         push @R, $hyphen_line;
 | |
|     }
 | |
|     print "<- output_header\n" if $opt_v > 2;
 | |
|     return @R;
 | |
| } # 1}}}
 | |
| sub generate_report {                        # {{{1
 | |
|     # returns an array of lines containing the results
 | |
|     my ($version    , # in
 | |
|         $elapsed_sec, # in
 | |
|         $report_type, # in  "by language" | "by report file" | "by file"
 | |
|         $rhh_count  , # in  count{TYPE}{nFiles|code|blank|comment|scaled}
 | |
|                       #       where TYPE = name of language, source file,
 | |
|                       #                    or report file
 | |
|         $rh_scale   , # in
 | |
|        ) = @_;
 | |
| 
 | |
|     my %orig_case = ();
 | |
|     if ($ON_WINDOWS and $report_type eq "by file") {
 | |
|         # restore the original upper/lowercase version of the file name
 | |
|         foreach my $lc_file (sort keys %{$rhh_count}) {
 | |
|             foreach my $cat (sort keys %{$rhh_count->{$lc_file}}) {
 | |
|                 $orig_case{ $upper_lower_map{$lc_file} }{$cat} =
 | |
|                            $rhh_count->{$lc_file}{$cat};
 | |
|             }
 | |
|         }
 | |
|         $rhh_count = \%orig_case;
 | |
|     }
 | |
|     print "-> generate_report\n" if $opt_v > 2;
 | |
|     my $DELIM = ",";
 | |
|        $DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
 | |
|        $DELIM = "|" if defined $opt_md;
 | |
| 
 | |
|     my @results       = ();
 | |
| 
 | |
|     my $languages     = ();
 | |
| 
 | |
|     my $sum_files     = 0;
 | |
|     my $sum_code      = 0;
 | |
|     my $sum_blank     = 0;
 | |
|     my $sum_comment   = 0;
 | |
|     my $max_len       = 0;
 | |
|     foreach my $language (keys %{$rhh_count}) {
 | |
|         $sum_files   += $rhh_count->{$language}{'nFiles'} ;
 | |
|         $sum_blank   += $rhh_count->{$language}{'blank'}  ;
 | |
|         $sum_comment += $rhh_count->{$language}{'comment'};
 | |
|         $sum_code    += $rhh_count->{$language}{'code'}   ;
 | |
|         $max_len      = length($language) if length($language) > $max_len;
 | |
|     }
 | |
|     my $column_1_offset = 0;
 | |
|        $column_1_offset = $max_len - 17 if $max_len > 17;
 | |
|     my $sum_lines = $sum_blank + $sum_comment + $sum_code;
 | |
|     $elapsed_sec = 0.5 unless $elapsed_sec;
 | |
| 
 | |
|     my $spacing_0 = 23;
 | |
|     my $spacing_1 = 13;
 | |
|     my $spacing_2 =  9;
 | |
|     my $spacing_3 = 17;
 | |
|     if (!$opt_3) {
 | |
|         $spacing_1 = 19;
 | |
|         $spacing_2 = 14;
 | |
|         $spacing_3 = 27;
 | |
|     }
 | |
|     $spacing_0 += $column_1_offset;
 | |
|     $spacing_1 += $column_1_offset;
 | |
|     $spacing_3 += $column_1_offset;
 | |
|     my %Format = (
 | |
|         '1' => { 'xml' => 'name="%s" ',
 | |
|                  'txt' => "\%-${spacing_0}s ",
 | |
|                },
 | |
|         '2' => { 'xml' => 'name="%s" ',
 | |
|                  'txt' => "\%-${spacing_3}s ",
 | |
|                },
 | |
|         '3' => { 'xml' => 'files_count="%d" ',
 | |
|                  'txt' => '%6d ',
 | |
|                },
 | |
|         '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
 | |
|                  'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
 | |
|                },
 | |
|         '5' => { 'xml' => 'blank="%3.2f" comment="%3.2f" code="%d" ',
 | |
|                  'txt' => "\%14.2f \%14.2f \%${spacing_2}d",
 | |
|                },
 | |
|         '6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
 | |
|                  'txt' => ' x %6.2f = %14.2f',
 | |
|                },
 | |
|     );
 | |
|     my $Style = "txt";
 | |
|        $Style = "xml" if $opt_xml ;
 | |
|        $Style = "xml" if $opt_yaml;  # not a typo; just set to anything but txt
 | |
|        $Style = "xml" if $opt_json;  # not a typo; just set to anything but txt
 | |
|        $Style = "xml" if $opt_csv ;  # not a typo; just set to anything but txt
 | |
| 
 | |
|     my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
 | |
|        $hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset)
 | |
|             if (!$opt_sum_reports) and (!$opt_3) and (68 + $column_1_offset) > 79;
 | |
|     my $data_line  = "";
 | |
|     my $first_column;
 | |
|     my $BY_LANGUAGE = 0;
 | |
|     my $BY_FILE     = 0;
 | |
|     if      ($report_type eq "by language") {
 | |
|         $first_column = "Language";
 | |
|         $BY_LANGUAGE  = 1;
 | |
|     } elsif ($report_type eq "by file")     {
 | |
|         $first_column = "File";
 | |
|         $BY_FILE      = 1;
 | |
|     } elsif ($report_type eq "by report file")     {
 | |
|         $first_column = "File";
 | |
|     } else {
 | |
|         $first_column = "Report File";
 | |
|     }
 | |
| 
 | |
|     my $header_line  = sprintf "%s v %s", $URL, $version;
 | |
|        $header_line .= sprintf("  T=%.2f s (%.1f files/s, %.1f lines/s)",
 | |
|                         $elapsed_sec           ,
 | |
|                         $sum_files/$elapsed_sec,
 | |
|                         $sum_lines/$elapsed_sec) unless $opt_sum_reports or $opt_hide_rate;
 | |
|     if ($opt_xml or $opt_yaml or $opt_json) {
 | |
|         if (!$ALREADY_SHOWED_HEADER) {
 | |
|             if ($opt_by_file_by_lang and $opt_json) {
 | |
|                 push @results, '{ "by_file" : ';
 | |
|             }
 | |
|             push @results, xml_yaml_or_json_header($URL, $version, $elapsed_sec,
 | |
|                                                    $sum_files, $sum_lines, $BY_FILE);
 | |
| #           $ALREADY_SHOWED_HEADER = 1 unless $opt_sum_reports;
 | |
|             # --sum-reports yields two xml or yaml files, one by
 | |
|             # language and one by report file, each of which needs a header
 | |
|         }
 | |
|         if ($opt_xml) {
 | |
|             if ($BY_FILE or ($report_type eq "by report file")) {
 | |
|                 push @results, "<files>";
 | |
|             } else {
 | |
|                 push @results, "<languages>";
 | |
|             }
 | |
|         }
 | |
|     } else {
 | |
|         $header_line =~ s/,// if $opt_csv;
 | |
|         push @results, output_header($header_line, $hyphen_line, $BY_FILE);
 | |
|     }
 | |
| 
 | |
|     if ($Style eq "txt") {
 | |
|         # column headers
 | |
|         if (!$opt_3 and $BY_FILE) {
 | |
|             my $spacing_n = $spacing_1 - 11;
 | |
|             $data_line  = sprintf "%-${spacing_n}s ", $first_column;
 | |
|         } else {
 | |
|             $data_line  = sprintf "%-${spacing_1}s ", $first_column;
 | |
|         }
 | |
|         if ($BY_FILE) {
 | |
|             $data_line .= sprintf "%${spacing_2}s "  , " "    ;
 | |
|         } else {
 | |
|             $data_line .= sprintf "%${spacing_2}s "  , "files";
 | |
|         }
 | |
|         my $PCT_symbol = "";
 | |
|            $PCT_symbol = " \%" if $opt_by_percent;
 | |
|         $data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
 | |
|             "blank${PCT_symbol}"   ,
 | |
|             "comment${PCT_symbol}" ,
 | |
|             "code";
 | |
|         $data_line .= sprintf " %8s   %14s",
 | |
|             "scale"         ,
 | |
|             "3rd gen. equiv"
 | |
|               if $opt_3;
 | |
|         if ($opt_md) {
 | |
|             my @col_header  = ();
 | |
|             if ($data_line =~ m{\s%}) {
 | |
|                 $data_line =~ s{\s%}{_%}g;
 | |
|                 foreach my $w ( split(' ', $data_line) ) {
 | |
|                     $w =~ s{_%}{ %};
 | |
|                     push @col_header, $w;
 | |
|                 }
 | |
|             } else {
 | |
|                 push @col_header, split(' ', $data_line);
 | |
|             }
 | |
|             my @col_hyphens    = ( '-------:') x scalar(@col_header);
 | |
|                $col_hyphens[0] =   ':-------'; # first column left justified
 | |
|             push @results, join("|", @col_header );
 | |
|             push @results, join("|", @col_hyphens);
 | |
|         } else {
 | |
|             push @results, $data_line;
 | |
|             push @results, $hyphen_line;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if ($opt_csv)  {
 | |
|         my $header2;
 | |
|         if ($BY_FILE) {
 | |
|             $header2 = "language${DELIM}filename";
 | |
|         } else {
 | |
|             $header2 = "files${DELIM}language";
 | |
|         }
 | |
|         $header2 .= "${DELIM}blank${DELIM}comment${DELIM}code";
 | |
|         $header2 .= "${DELIM}scale${DELIM}3rd gen. equiv" if $opt_3;
 | |
|         $header2 .= ${DELIM} . '"' . $header_line . '"';
 | |
|         push @results, $header2;
 | |
|     }
 | |
| 
 | |
|     my $sum_scaled = 0;
 | |
|     foreach my $lang_or_file (sort {
 | |
|                                  $rhh_count->{$b}{'code'} <=>
 | |
|                                  $rhh_count->{$a}{'code'}
 | |
|                               or $a cmp $b
 | |
|                                         }
 | |
|                                    keys %{$rhh_count}) {
 | |
|         next if $lang_or_file eq "by report file";
 | |
|         my ($factor, $scaled);
 | |
|         if ($BY_LANGUAGE or $BY_FILE) {
 | |
|             $factor = 1;
 | |
|             if ($BY_LANGUAGE) {
 | |
|                 if (defined $rh_scale->{$lang_or_file}) {
 | |
|                     $factor = $rh_scale->{$lang_or_file};
 | |
|                 } else {
 | |
|                     warn "No scale factor for $lang_or_file; using 1.00";
 | |
|                 }
 | |
|             } else { # by individual code file
 | |
|                 if ($report_type ne "by report file") {
 | |
|                     next unless defined $rhh_count->{$lang_or_file}{'lang'};
 | |
|                     next unless defined $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
 | |
|                     $factor = $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
 | |
|                 }
 | |
|             }
 | |
|             $scaled = $factor*$rhh_count->{$lang_or_file}{'code'};
 | |
|         } else {
 | |
|             if (!defined $rhh_count->{$lang_or_file}{'scaled'}) {
 | |
|                 $opt_3 = 0;
 | |
|                 # If we're summing together files previously generated
 | |
|                 # with --no3 then rhh_count->{$lang_or_file}{'scaled'}
 | |
|                 # this variable will be undefined.  That should only
 | |
|                 # happen when summing together by file however.
 | |
|             } elsif ($BY_LANGUAGE) {
 | |
|                 warn "Missing scaled language info for $lang_or_file\n";
 | |
|             }
 | |
|             if ($opt_3) {
 | |
|                 $scaled =         $rhh_count->{$lang_or_file}{'scaled'};
 | |
|                 $factor = $scaled/$rhh_count->{$lang_or_file}{'code'};
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if ($BY_FILE) {
 | |
|             my $clean_filename = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
 | |
|                $clean_filename = xml_metachars($clean_filename) if $opt_xml;
 | |
|             $data_line  = sprintf $Format{'1'}{$Style}, $clean_filename;
 | |
|         } else {
 | |
|             $data_line  = sprintf $Format{'2'}{$Style}, $lang_or_file;
 | |
|         }
 | |
|         $data_line .= sprintf $Format{3}{$Style}  ,
 | |
|                         $rhh_count->{$lang_or_file}{'nFiles'} unless $BY_FILE;
 | |
|         if ($opt_by_percent) {
 | |
|           my $DEN = compute_denominator($opt_by_percent       ,
 | |
|                         $rhh_count->{$lang_or_file}{'code'}   ,
 | |
|                         $rhh_count->{$lang_or_file}{'comment'},
 | |
|                         $rhh_count->{$lang_or_file}{'blank'}  );
 | |
|           $data_line .= sprintf $Format{5}{$Style}  ,
 | |
|               $rhh_count->{$lang_or_file}{'blank'}   / $DEN * 100,
 | |
|               $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100,
 | |
|               $rhh_count->{$lang_or_file}{'code'}   ;
 | |
|         } else {
 | |
|           $data_line .= sprintf $Format{4}{$Style}  ,
 | |
|               $rhh_count->{$lang_or_file}{'blank'}  ,
 | |
|               $rhh_count->{$lang_or_file}{'comment'},
 | |
|               $rhh_count->{$lang_or_file}{'code'}   ;
 | |
|         }
 | |
|         $data_line .= sprintf $Format{6}{$Style}  ,
 | |
|             $factor                               ,
 | |
|             $scaled if $opt_3;
 | |
|         $sum_scaled  += $scaled if $opt_3;
 | |
| 
 | |
|         if ($opt_xml) {
 | |
|             if (defined $rhh_count->{$lang_or_file}{'lang'}) {
 | |
|                 my $lang = $rhh_count->{$lang_or_file}{'lang'};
 | |
|                 if (!defined $languages->{$lang}) {
 | |
|                     $languages->{$lang} = $lang;
 | |
|                 }
 | |
|                 $data_line.=' language="' . $lang . '" ';
 | |
|             }
 | |
|             if ($BY_FILE or ($report_type eq "by report file")) {
 | |
|                 push @results, "  <file " . $data_line . "/>";
 | |
|             } else {
 | |
|                 push @results, "  <language " . $data_line . "/>";
 | |
|             }
 | |
|         } elsif ($opt_yaml or $opt_json) {
 | |
|             my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
 | |
|             if ($opt_yaml) {
 | |
|                 # YAML: force quoted language or filename in case these
 | |
|                 #       have embedded funny characters, issue #312
 | |
|                 push @results,"'" . rm_leading_tempdir($lang_or_file, \%TEMP_DIR). "' :$open_B";
 | |
|             } else {
 | |
|                 push @results,"${Q}" . rm_leading_tempdir($lang_or_file, \%TEMP_DIR). "${Q} :$open_B";
 | |
|             }
 | |
|             push @results,"  ${Q}nFiles${Q}: " . $rhh_count->{$lang_or_file}{'nFiles'} . $C
 | |
|                 unless $BY_FILE;
 | |
|             if ($opt_by_percent) {
 | |
|               my $DEN = compute_denominator($opt_by_percent       ,
 | |
|                             $rhh_count->{$lang_or_file}{'code'}   ,
 | |
|                             $rhh_count->{$lang_or_file}{'comment'},
 | |
|                             $rhh_count->{$lang_or_file}{'blank'}  );
 | |
|               push @results,"  ${Q}blank_pct${Q}: "   .
 | |
|                 sprintf("%3.2f", $rhh_count->{$lang_or_file}{'blank'} / $DEN * 100) . $C;
 | |
|               push @results,"  ${Q}comment_pct${Q}: " .
 | |
|                 sprintf("%3.2f", $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100) . $C;
 | |
|               push @results,"  ${Q}code${Q}: "    . $rhh_count->{$lang_or_file}{'code'}  . $C;
 | |
|             } else {
 | |
|               push @results,"  ${Q}blank${Q}: "   . $rhh_count->{$lang_or_file}{'blank'}   . $C;
 | |
|               push @results,"  ${Q}comment${Q}: " . $rhh_count->{$lang_or_file}{'comment'} . $C;
 | |
|               push @results,"  ${Q}code${Q}: "    . $rhh_count->{$lang_or_file}{'code'}    . $C;
 | |
|             }
 | |
|             push @results,"  ${Q}language${Q}: "  . $Q . $rhh_count->{$lang_or_file}{'lang'} . $Q . $C
 | |
|                 if $BY_FILE;
 | |
|             if ($opt_3) {
 | |
|                 push @results, "  ${Q}scaled${Q}: " . $scaled . $C;
 | |
|                 push @results, "  ${Q}factor${Q}: " . $factor . $C;
 | |
|             }
 | |
|             if ($opt_json) { # replace the trailing comma with }, on the last line
 | |
|                 $results[-1] =~ s/,\s*$/},/;
 | |
|             }
 | |
|         } elsif ($opt_csv or $opt_md) {
 | |
|             my $extra_3 = "";
 | |
|                $extra_3 = "${DELIM}$factor${DELIM}$scaled" if $opt_3;
 | |
|             my $first_column = undef;
 | |
|             my $clean_name   = $lang_or_file;
 | |
|             my $str;
 | |
|             if ($opt_csv) {
 | |
|                 if ($BY_FILE) {
 | |
|                     $first_column = $rhh_count->{$lang_or_file}{'lang'};
 | |
|                     $clean_name   = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
 | |
|                 } else {
 | |
|                     $first_column = $rhh_count->{$lang_or_file}{'nFiles'};
 | |
|                 }
 | |
|                 $str = $first_column   . ${DELIM} .
 | |
|                        $clean_name     . ${DELIM};
 | |
|             } else {
 | |
|                 if ($BY_FILE) {
 | |
|                     $first_column = $rhh_count->{$lang_or_file}{'lang'};
 | |
|                     $clean_name   = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
 | |
|                     $str = $clean_name . ${DELIM};
 | |
|                 } else {
 | |
|                     $first_column = $rhh_count->{$lang_or_file}{'nFiles'};
 | |
|                     $str = $clean_name     . ${DELIM} .
 | |
|                            $first_column   . ${DELIM};
 | |
|                 }
 | |
|             }
 | |
|             if ($opt_by_percent) {
 | |
|               my $DEN = compute_denominator($opt_by_percent               ,
 | |
|                             $rhh_count->{$lang_or_file}{'code'}   ,
 | |
|                             $rhh_count->{$lang_or_file}{'comment'},
 | |
|                             $rhh_count->{$lang_or_file}{'blank'}  );
 | |
|               $str .= sprintf("%3.2f", $rhh_count->{$lang_or_file}{'blank'}   / $DEN * 100) . ${DELIM} .
 | |
|                       sprintf("%3.2f", $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100) . ${DELIM} .
 | |
|                       $rhh_count->{$lang_or_file}{'code'};
 | |
|             } else {
 | |
|               $str .= $rhh_count->{$lang_or_file}{'blank'}  . ${DELIM} .
 | |
|                       $rhh_count->{$lang_or_file}{'comment'}. ${DELIM} .
 | |
|                       $rhh_count->{$lang_or_file}{'code'};
 | |
|             }
 | |
|             $str .= $extra_3;
 | |
|             push @results, $str;
 | |
| 
 | |
|         } else {
 | |
|             push @results, $data_line;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     my $avg_scale = 1;  # weighted average of scale factors
 | |
|        $avg_scale = sprintf("%.2f", $sum_scaled / $sum_code)
 | |
|             if $sum_code and $opt_3;
 | |
| 
 | |
|     if ($opt_xml) {
 | |
|         $data_line = "";
 | |
|         if (!$BY_FILE) {
 | |
|             $data_line .= sprintf "sum_files=\"%d\" ", $sum_files;
 | |
|         }
 | |
|         if ($opt_by_percent) {
 | |
|           my $DEN = compute_denominator($opt_by_percent    ,
 | |
|                         $sum_code, $sum_comment, $sum_blank);
 | |
|           $data_line .= sprintf $Format{'5'}{$Style},
 | |
|               $sum_blank   / $DEN * 100,
 | |
|               $sum_comment / $DEN * 100,
 | |
|               $sum_code    ;
 | |
|         } else {
 | |
|           $data_line .= sprintf $Format{'4'}{$Style},
 | |
|               $sum_blank   ,
 | |
|               $sum_comment ,
 | |
|               $sum_code    ;
 | |
|         }
 | |
|         $data_line .= sprintf $Format{'6'}{$Style},
 | |
|             $avg_scale   ,
 | |
|             $sum_scaled  if $opt_3;
 | |
|         push @results, "  <total " . $data_line . "/>";
 | |
| 
 | |
|         if ($BY_FILE or ($report_type eq "by report file")) {
 | |
|             push @results, "</files>";
 | |
|         } else {
 | |
|             foreach my $language (keys %{$languages}) {
 | |
|                 push @results, '  <language name="' . $language . '"/>';
 | |
|             }
 | |
|             push @results, "</languages>";
 | |
|         }
 | |
| 
 | |
|         if (!$opt_by_file_by_lang or $ALREADY_SHOWED_XML_SECTION) {
 | |
|             push @results, "</results>";
 | |
|         } else {
 | |
|             $ALREADY_SHOWED_XML_SECTION = 1;
 | |
|         }
 | |
|     } elsif ($opt_yaml or $opt_json) {
 | |
|         my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
 | |
|         push @results, "${Q}SUM${Q}: ${open_B}";
 | |
|         if ($opt_by_percent) {
 | |
|           my $DEN = compute_denominator($opt_by_percent    ,
 | |
|                         $sum_code, $sum_comment, $sum_blank);
 | |
|           push @results, "  ${Q}blank${Q}: "  . sprintf("%.2f", $sum_blank   / $DEN * 100) . $C;
 | |
|           push @results, "  ${Q}comment${Q}: ". sprintf("%.2f", $sum_comment / $DEN * 100) . $C;
 | |
|           push @results, "  ${Q}code${Q}: "   . $sum_code    . $C;
 | |
|         } else {
 | |
|           push @results, "  ${Q}blank${Q}: "  . $sum_blank   . $C;
 | |
|           push @results, "  ${Q}comment${Q}: ". $sum_comment . $C;
 | |
|           push @results, "  ${Q}code${Q}: "   . $sum_code    . $C;
 | |
|         }
 | |
|         push @results, "  ${Q}nFiles${Q}: " . $sum_files   . $C;
 | |
|         if ($opt_3) {
 | |
|             push @results, "  ${Q}scaled${Q}: " . $sum_scaled . $C;
 | |
|             push @results, "  ${Q}factor${Q}: " . $avg_scale  . $C;
 | |
|         }
 | |
|         if ($opt_json) {
 | |
|             $results[-1] =~ s/,\s*$/} }/;
 | |
|             if ($opt_by_file_by_lang) {
 | |
|                 if ($ALREADY_SHOWED_HEADER) {
 | |
|                     $results[-1] .= ' }';
 | |
|                 } else {
 | |
|                     $results[-1] .= ', "by_lang" : {';
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     } elsif ($opt_csv) {
 | |
|         my @entries = ();
 | |
|         if ($opt_by_file) {
 | |
|             push @entries, "SUM";
 | |
|             push @entries, "";
 | |
|         } else {
 | |
|             push @entries, $sum_files;
 | |
|             push @entries, "SUM";
 | |
|         }
 | |
|         if ($opt_by_percent) {
 | |
|             my $DEN = compute_denominator($opt_by_percent    ,
 | |
|                           $sum_code, $sum_comment, $sum_blank);
 | |
|             push @entries, sprintf("%.2f", $sum_blank   / $DEN * 100);
 | |
|             push @entries, sprintf("%.2f", $sum_comment / $DEN * 100);
 | |
|         } else {
 | |
|             push @entries, $sum_blank;
 | |
|             push @entries, $sum_comment;
 | |
|         }
 | |
|         push @entries, $sum_code;
 | |
|         if ($opt_3) {
 | |
|             push @entries, $sum_scaled;
 | |
|             push @entries, $avg_scale ;
 | |
|         }
 | |
|         my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ",";
 | |
|         push @results, join($separator, @entries);
 | |
|     } else {
 | |
| 
 | |
|         if ($BY_FILE) {
 | |
|             $data_line  = sprintf "%-${spacing_0}s ", "SUM:"  ;
 | |
|         } else {
 | |
|             $data_line  = sprintf "%-${spacing_1}s ", "SUM:"  ;
 | |
|             $data_line .= sprintf "%${spacing_2}d ", $sum_files;
 | |
|         }
 | |
|         if ($opt_by_percent) {
 | |
|           my $DEN = compute_denominator($opt_by_percent    ,
 | |
|                         $sum_code, $sum_comment, $sum_blank);
 | |
|           $data_line .= sprintf $Format{'5'}{$Style},
 | |
|               $sum_blank   / $DEN * 100,
 | |
|               $sum_comment / $DEN * 100,
 | |
|               $sum_code    ;
 | |
|         } else {
 | |
|           $data_line .= sprintf $Format{'4'}{$Style},
 | |
|               $sum_blank   ,
 | |
|               $sum_comment ,
 | |
|               $sum_code    ;
 | |
|         }
 | |
|         $data_line .= sprintf $Format{'6'}{$Style},
 | |
|             $avg_scale   ,
 | |
|             $sum_scaled if $opt_3;
 | |
|         if ($opt_md) {
 | |
|             my @words = split(' ', $data_line);
 | |
|             my $n_cols = scalar(@words);
 | |
| #           my $n_cols = scalar(split(' ', $data_line));  # deprecated
 | |
|             $data_line =~ s/\s+/\|/g;
 | |
|             my @col_hyphens    = ( '--------') x $n_cols;
 | |
|             push @results, join("|", @col_hyphens);
 | |
|             push @results, $data_line   if $sum_files > 1 or $opt_sum_one;
 | |
|             unshift @results, ( "cloc|$header_line", "--- | ---", "", );
 | |
|         } else {
 | |
|             push @results, $hyphen_line if $sum_files > 1 or $opt_sum_one;
 | |
|             push @results, $data_line   if $sum_files > 1 or $opt_sum_one;
 | |
|             push @results, $hyphen_line;
 | |
|         }
 | |
|     }
 | |
|     write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
 | |
|     $ALREADY_SHOWED_HEADER = 1 unless $opt_sum_reports;
 | |
|     print "<- generate_report\n" if $opt_v > 2;
 | |
|     return @results;
 | |
| } # 1}}}
 | |
| sub print_errors {                           # {{{1
 | |
|     my ($rh_Error_Codes, # in
 | |
|         $raa_errors    , # in
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> print_errors\n" if $opt_v > 2;
 | |
|     my %error_string = reverse(%{$rh_Error_Codes});
 | |
|     my $nErrors      = scalar @{$raa_errors};
 | |
|     warn sprintf "\n%d error%s:\n", plural_form(scalar @Errors);
 | |
|     for (my $i = 0; $i < $nErrors; $i++) {
 | |
|         warn sprintf "%s:  %s\n",
 | |
|                      $error_string{ $raa_errors->[$i][0] },
 | |
|                      $raa_errors->[$i][1] ;
 | |
|     }
 | |
|     print "<- print_errors\n" if $opt_v > 2;
 | |
| 
 | |
| } # 1}}}
 | |
| sub write_lang_def {                         # {{{1
 | |
|     my ($file                     ,
 | |
|         $rh_Language_by_Extension , # in
 | |
|         $rh_Language_by_Script    , # in
 | |
|         $rh_Language_by_File      , # in
 | |
|         $rhaa_Filters_by_Language , # in
 | |
|         $rh_Not_Code_Extension    , # in
 | |
|         $rh_Not_Code_Filename     , # in
 | |
|         $rh_Scale_Factor          , # in
 | |
|         $rh_EOL_Continuation_re   , # in
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> write_lang_def($file)\n" if $opt_v > 2;
 | |
|     my @outlines = ();
 | |
| 
 | |
|     foreach my $language (sort keys %{$rhaa_Filters_by_Language}) {
 | |
|         next if $language =~ /(Brain|\(unknown\))/;
 | |
|         next if defined $Extension_Collision{$language};
 | |
|         push @outlines, $language;
 | |
|         foreach my $filter (@{$rhaa_Filters_by_Language->{$language}}) {
 | |
|             my $line = "";
 | |
|             $line .= sprintf "    filter %s", $filter->[0];
 | |
|             $line .= sprintf " %s", $filter->[1] if defined $filter->[1];
 | |
|             # $filter->[0] == 'remove_between_general',
 | |
|             #                 'remove_between_regex', and
 | |
|             #                 'remove_matches_2re' have two args
 | |
|             $line .= sprintf " %s", $filter->[2] if defined $filter->[2];
 | |
|             # $filter->[0] == 'replace_between_regex' has three or four args
 | |
|             $line .= sprintf " %s", $filter->[3] if defined $filter->[3];
 | |
|             $line .= sprintf " %s", $filter->[4] if defined $filter->[4];
 | |
|             push @outlines, $line;
 | |
|         }
 | |
| 
 | |
|         # file extension won't appear if the extension maps to
 | |
|         # multiple languages; work around this
 | |
|         my $found = 0;
 | |
|         foreach my $ext (sort keys %{$rh_Language_by_Extension}) {
 | |
|             if ($language eq $rh_Language_by_Extension->{$ext}) {
 | |
|                 push @outlines, sprintf "    extension %s\n", $ext;
 | |
|                 $found = 1;
 | |
|             }
 | |
|         }
 | |
|         if (!$found and $opt_write_lang_def_incl_dup) {
 | |
|             foreach my $multilang (sort keys %Extension_Collision) {
 | |
|                 my %Languages = map { $_ => 1 } split('/', $multilang);
 | |
|                 next unless $Languages{$language};
 | |
|                 foreach my $ext (@{$Extension_Collision{$multilang}}) {
 | |
|                     push @outlines, sprintf "    extension %s\n", $ext;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         foreach my $filename (sort keys %{$rh_Language_by_File}) {
 | |
|             if ($language eq $rh_Language_by_File->{$filename}) {
 | |
|                 push @outlines, sprintf "    filename %s\n", $filename;
 | |
|             }
 | |
|         }
 | |
|         foreach my $script_exe (sort keys %{$rh_Language_by_Script}) {
 | |
|             if ($language eq $rh_Language_by_Script->{$script_exe}) {
 | |
|                 push @outlines, sprintf "    script_exe %s\n", $script_exe;
 | |
|             }
 | |
|         }
 | |
|         push @outlines, sprintf "    3rd_gen_scale %.2f\n", $rh_Scale_Factor->{$language};
 | |
|         if (defined $rh_EOL_Continuation_re->{$language}) {
 | |
|             push @outlines, sprintf "    end_of_line_continuation %s\n",
 | |
|                 $rh_EOL_Continuation_re->{$language};
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     write_file($file, {}, @outlines);
 | |
|     print "<- write_lang_def\n" if $opt_v > 2;
 | |
| } # 1}}}
 | |
| sub read_lang_def {                          # {{{1
 | |
|     my ($file                     ,
 | |
|         $rh_Language_by_Extension , # out
 | |
|         $rh_Language_by_Script    , # out
 | |
|         $rh_Language_by_File      , # out
 | |
|         $rhaa_Filters_by_Language , # out
 | |
|         $rh_Not_Code_Extension    , # out
 | |
|         $rh_Not_Code_Filename     , # out
 | |
|         $rh_Scale_Factor          , # out
 | |
|         $rh_EOL_Continuation_re   , # out
 | |
|         $rh_EOL_abc,
 | |
|        ) = @_;
 | |
| 
 | |
| 
 | |
|     print "-> read_lang_def($file)\n" if $opt_v > 2;
 | |
| 
 | |
|     my $language = "";
 | |
|     my @lines = read_file($file);
 | |
|     foreach (@lines) {
 | |
|         next if /^\s*#/ or /^\s*$/;
 | |
| 
 | |
|         $_ = lc $_ if $ON_WINDOWS and /^\s+(filename|extension)\s/;
 | |
| 
 | |
|         if (/^(\w+.*?)\s*$/) {
 | |
|             $language = $1;
 | |
|             next;
 | |
|         }
 | |
|         die "Missing computer language name, line $. of $file\n"
 | |
|             unless $language;
 | |
| 
 | |
|         if      (/^\s{4}filter\s+(remove_between_(general|2re|regex))
 | |
|                        \s+(\S+)\s+(\S+)\s*$/x) {
 | |
|             push @{$rhaa_Filters_by_Language->{$language}}, [
 | |
|                   $1 , $3 , $4 ]
 | |
| 
 | |
|         } elsif (/^\s{4}filter\s+(replace_between_regex)
 | |
|                    \s+(\S+)\s+(\S+)\s+(\S+|\".*\")\s+(\S+)\s*$/x) {
 | |
|             push @{$rhaa_Filters_by_Language->{$language}}, [
 | |
|                   $1 , $2 , $3 , $4 , $5]
 | |
| 
 | |
|         } elsif (/^\s{4}filter\s+(replace_between_regex)
 | |
|                        \s+(\S+)\s+(\S+)\s+(.*?)\s*$/x) {
 | |
|             push @{$rhaa_Filters_by_Language->{$language}}, [
 | |
|                   $1 , $2 , $3 , $4 ]
 | |
| 
 | |
|         } elsif (/^\s{4}filter\s+(replace_regex)\s+(\S+)\s*$/) {
 | |
|             push @{$rhaa_Filters_by_Language->{$language}}, [
 | |
|                   $1 , $2 , '' ]
 | |
| 
 | |
|         } elsif (/^\s{4}filter\s+(replace_regex)
 | |
|                        \s+(\S+)\s+(.+?)\s*$/x) {
 | |
|             push @{$rhaa_Filters_by_Language->{$language}}, [
 | |
|                   $1 , $2 , $3 ]
 | |
| 
 | |
|         } elsif (/^\s{4}filter\s+(\w+)\s*$/) {
 | |
|             push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ]
 | |
| 
 | |
|         } elsif (/^\s{4}filter\s+(\w+)\s+(.*?)\s*$/) {
 | |
|             push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ]
 | |
| 
 | |
|         } elsif (/^\s{4}extension\s+(\S+)\s*$/) {
 | |
|             if (defined $rh_Language_by_Extension->{$1}) {
 | |
|                 die "File extension collision:  $1 ",
 | |
|                     "maps to languages '$rh_Language_by_Extension->{$1}' ",
 | |
|                     "and '$language'\n" ,
 | |
|                     "Edit $file and remove $1 from one of these two ",
 | |
|                     "language definitions.\n";
 | |
|             }
 | |
|             $rh_Language_by_Extension->{$1} = $language;
 | |
| 
 | |
|         } elsif (/^\s{4}filename\s+(\S+)\s*$/) {
 | |
|             $rh_Language_by_File->{$1} = $language;
 | |
| 
 | |
|         } elsif (/^\s{4}script_exe\s+(\S+)\s*$/) {
 | |
|             $rh_Language_by_Script->{$1} = $language;
 | |
| 
 | |
|         } elsif (/^\s{4}3rd_gen_scale\s+(\S+)\s*$/) {
 | |
|             $rh_Scale_Factor->{$language} = $1;
 | |
| 
 | |
|         } elsif (/^\s{4}end_of_line_continuation\s+(\S+)\s*$/) {
 | |
|             $rh_EOL_Continuation_re->{$language} = $1;
 | |
| 
 | |
|         } else {
 | |
|             die "Unexpected data line $. of $file:\n$_\n";
 | |
|         }
 | |
| 
 | |
|     }
 | |
|     print "<- read_lang_def\n" if $opt_v > 2;
 | |
| } # 1}}}
 | |
| sub merge_lang_def {                         # {{{1
 | |
|     my ($file                     ,
 | |
|         $rh_Language_by_Extension , # in/out
 | |
|         $rh_Language_by_Script    , # in/out
 | |
|         $rh_Language_by_File      , # in/out
 | |
|         $rhaa_Filters_by_Language , # in/out
 | |
|         $rh_Not_Code_Extension    , # in/out
 | |
|         $rh_Not_Code_Filename     , # in/out
 | |
|         $rh_Scale_Factor          , # in/out
 | |
|         $rh_EOL_Continuation_re   , # in/out
 | |
|         $rh_EOL_abc,
 | |
|        ) = @_;
 | |
| 
 | |
| 
 | |
|     print "-> merge_lang_def($file)\n" if $opt_v > 2;
 | |
| 
 | |
|     my $language        = "";
 | |
|     my $already_know_it = undef;
 | |
|     my @lines = read_file($file);
 | |
|     foreach (@lines) {
 | |
|         next if /^\s*#/ or /^\s*$/;
 | |
| 
 | |
|         $_ = lc $_ if $ON_WINDOWS and /^\s+(filename|extension)\s/;
 | |
| 
 | |
|         if (/^(\w+.*?)\s*$/) {
 | |
|             $language = $1;
 | |
|             $already_know_it = defined $rh_Scale_Factor->{$language};
 | |
|             next;
 | |
|         }
 | |
|         die "Missing computer language name, line $. of $file\n"
 | |
|             unless $language;
 | |
| 
 | |
|         if      (/^\s{4}filter\s+(remove_between_(general|2re|regex))
 | |
|                        \s+(\S+)\s+(\S+)\s*$/x) {
 | |
|             next if $already_know_it;
 | |
|             push @{$rhaa_Filters_by_Language->{$language}}, [
 | |
|                   $1 , $3 , $4 ]
 | |
| 
 | |
|         } elsif (/^\s{4}filter\s+(replace_between_regex)
 | |
|                    \s+(\S+)\s+(\S+)\s+(\S+|\".*\")\s+(\S+)\s*$/x) {
 | |
|             next if $already_know_it;
 | |
|             push @{$rhaa_Filters_by_Language->{$language}}, [
 | |
|                   $1 , $2 , $3 , $4 , $5]
 | |
| 
 | |
|         } elsif (/^\s{4}filter\s+(replace_between_regex)
 | |
|                        \s+(\S+)\s+(\S+)\s+(.*?)\s*$/x) {
 | |
|             next if $already_know_it;
 | |
|             push @{$rhaa_Filters_by_Language->{$language}}, [
 | |
|                   $1 , $2 , $3 , $4 ]
 | |
| 
 | |
|         } elsif (/^\s{4}filter\s+(replace_regex)
 | |
|                        \s+(\S+)\s+(.+?)\s*$/x) {
 | |
|             next if $already_know_it;
 | |
|             push @{$rhaa_Filters_by_Language->{$language}}, [
 | |
|                   $1 , $2 , $3 ]
 | |
| 
 | |
|         } elsif (/^\s{4}filter\s+(\w+)\s*$/) {
 | |
|             next if $already_know_it;
 | |
|             push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ]
 | |
| 
 | |
|         } elsif (/^\s{4}filter\s+(\w+)\s+(.*?)\s*$/) {
 | |
|             next if $already_know_it;
 | |
|             push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ]
 | |
| 
 | |
|         } elsif (/^\s{4}extension\s+(\S+)\s*$/) {
 | |
|             next if $already_know_it;
 | |
|             if (defined $rh_Language_by_Extension->{$1}) {
 | |
|                 die "File extension collision:  $1 ",
 | |
|                     "maps to languages '$rh_Language_by_Extension->{$1}' ",
 | |
|                     "and '$language'\n" ,
 | |
|                     "Edit $file and remove $1 from one of these two ",
 | |
|                     "language definitions.\n";
 | |
|             }
 | |
|             $rh_Language_by_Extension->{$1} = $language;
 | |
| 
 | |
|         } elsif (/^\s{4}filename\s+(\S+)\s*$/) {
 | |
|             next if $already_know_it;
 | |
|             $rh_Language_by_File->{$1} = $language;
 | |
| 
 | |
|         } elsif (/^\s{4}script_exe\s+(\S+)\s*$/) {
 | |
|             next if $already_know_it;
 | |
|             $rh_Language_by_Script->{$1} = $language;
 | |
| 
 | |
|         } elsif (/^\s{4}3rd_gen_scale\s+(\S+)\s*$/) {
 | |
|             next if $already_know_it;
 | |
|             $rh_Scale_Factor->{$language} = $1;
 | |
| 
 | |
|         } elsif (/^\s{4}end_of_line_continuation\s+(\S+)\s*$/) {
 | |
|             next if $already_know_it;
 | |
|             $rh_EOL_Continuation_re->{$language} = $1;
 | |
| 
 | |
|         } else {
 | |
|             die "Unexpected data line $. of $file:\n$_\n";
 | |
|         }
 | |
| 
 | |
|     }
 | |
|     print "<- merge_lang_def\n" if $opt_v > 2;
 | |
| } # 1}}}
 | |
| sub print_extension_info {                   # {{{1
 | |
|     my ($extension,) = @_;
 | |
|     if ($extension) {  # show information on this extension
 | |
|         foreach my $ext (sort {lc $a cmp lc $b or $a cmp $b } keys %Language_by_Extension) {
 | |
|             # Language_by_Extension{f}    = 'Fortran 77'
 | |
|             next if $Language_by_Extension{$ext} =~ /Brain/;
 | |
|             printf "%-15s -> %s\n", $ext, $Language_by_Extension{$ext}
 | |
|                 if $ext =~ m{$extension}i;
 | |
|         }
 | |
|     } else {           # show information on all  extensions
 | |
|         foreach my $ext (sort {lc $a cmp lc $b or $a cmp $b } keys %Language_by_Extension) {
 | |
|             next if $Language_by_Extension{$ext} =~ /Brain/;
 | |
|             # Language_by_Extension{f}    = 'Fortran 77'
 | |
|             printf "%-15s -> %s\n", $ext, $Language_by_Extension{$ext};
 | |
|         }
 | |
|     }
 | |
| } # 1}}}
 | |
| sub print_language_info {                    # {{{1
 | |
|     my ($language,
 | |
|         $prefix ,) = @_;
 | |
|     my %extensions = (); # the subset matched by the given $language value
 | |
|     if ($language) {  # show information on this language
 | |
|         foreach my $ext (sort {lc $a cmp lc $b or $a cmp $b } keys %Language_by_Extension) {
 | |
|             # Language_by_Extension{f}    = 'Fortran 77'
 | |
|             push @{$extensions{$Language_by_Extension{$ext}} }, $ext
 | |
|                 if lc $Language_by_Extension{$ext} eq lc $language;
 | |
| #               if $Language_by_Extension{$ext} =~ m{$language}i;
 | |
|         }
 | |
|     } else {          # show information on all  languages
 | |
|         foreach my $ext (sort {lc $a cmp lc $b  or $a cmp $b } keys %Language_by_Extension) {
 | |
|             # Language_by_Extension{f}    = 'Fortran 77'
 | |
|             push @{$extensions{$Language_by_Extension{$ext}} }, $ext
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     # add exceptions (one file extension mapping to multiple languages)
 | |
|     if (!$language or $language =~ /^(Objective-C|MATLAB|Mathematica|MUMPS|Mercury)$/i) {
 | |
|         push @{$extensions{'Objective-C'}}, "m";
 | |
|         push @{$extensions{'MATLAB'}}     , "m";
 | |
|         push @{$extensions{'Mathematica'}}, "m";
 | |
|         push @{$extensions{'MUMPS'}}      , "m";
 | |
|         delete $extensions{'MATLAB/Mathematica/Objective-C/MUMPS/Mercury'};
 | |
|     }
 | |
|     if (!$language or $language =~ /^(Lisp|OpenCL)$/i) {
 | |
|         push @{$extensions{'Lisp'}}  , "cl";
 | |
|         push @{$extensions{'OpenCL'}}, "cl";
 | |
|         delete $extensions{'Lisp/OpenCL'};
 | |
|     }
 | |
|     if (!$language or $language =~ /^(Lisp|Julia)$/i) {
 | |
|         push @{$extensions{'Lisp'}}  , "jl";
 | |
|         push @{$extensions{'Julia'}} , "jl";
 | |
|         delete $extensions{'Lisp/Julia'};
 | |
|     }
 | |
|     if (!$language or $language =~ /^(Perl|Prolog)$/i) {
 | |
|         push @{$extensions{'Perl'}}  , "pl";
 | |
|         push @{$extensions{'Prolog'}}, "pl";
 | |
|         delete $extensions{'Perl/Prolog'};
 | |
|     }
 | |
|     if (!$language or $language =~ /^(Raku|Prolog)$/i) {
 | |
|         push @{$extensions{'Perl'}}  , "p6";
 | |
|         push @{$extensions{'Prolog'}}, "p6";
 | |
|         delete $extensions{'Perl/Prolog'};
 | |
|     }
 | |
|     if (!$language or $language =~ /^(IDL|Qt Project|Prolog|ProGuard)$/i) {
 | |
|         push @{$extensions{'IDL'}}       , "pro";
 | |
|         push @{$extensions{'Qt Project'}}, "pro";
 | |
|         push @{$extensions{'Prolog'}}    , "pro";
 | |
|         push @{$extensions{'ProGuard'}}  , "pro";
 | |
|         delete $extensions{'IDL/Qt Project/Prolog/ProGuard'};
 | |
|     }
 | |
|     if (!$language or $language =~ /^(D|dtrace)$/i) {
 | |
|         push @{$extensions{'D'}}       , "d";
 | |
|         push @{$extensions{'dtrace'}}  , "d";
 | |
|         delete $extensions{'D/dtrace'};
 | |
|     }
 | |
|     if (!$language or $language =~ /^Forth$/) {
 | |
|         push @{$extensions{'Forth'}}     , "fs";
 | |
|         push @{$extensions{'Forth'}}     , "f";
 | |
|         push @{$extensions{'Forth'}}     , "for";
 | |
|         delete $extensions{'Fortran 77/Forth'};
 | |
|         delete $extensions{'F#/Forth'};
 | |
|     }
 | |
|     if (!$language or $language =~ /^Fortran 77$/) {
 | |
|         push @{$extensions{'Fortran 77'}}, "f";
 | |
|         push @{$extensions{'Fortran 77'}}, "for";
 | |
|         push @{$extensions{'F#'}}        , "fs";
 | |
|         delete $extensions{'Fortran 77/Forth'};
 | |
|     }
 | |
|     if (!$language or $language =~ /^F#$/) {
 | |
|         push @{$extensions{'F#'}}        , "fs";
 | |
|         delete $extensions{'F#/Forth'};
 | |
|     }
 | |
|     if (!$language or $language =~ /^(Verilog-SystemVerilog|Coq)$/) {
 | |
|         push @{$extensions{'Coq'}}                   , "v";
 | |
|         push @{$extensions{'Verilog-SystemVerilog'}} , "v";
 | |
|         delete $extensions{'Verilog-SystemVerilog/Coq'};
 | |
|     }
 | |
|     if (!$language or $language =~ /^(TypeScript|Qt Linguist)$/) {
 | |
|         push @{$extensions{'TypeScript'}}  , "ts";
 | |
|         push @{$extensions{'Qt Linguist'}} , "ts";
 | |
|         delete $extensions{'TypeScript/Qt Linguist'};
 | |
|     }
 | |
|     if (!$language or $language =~ /^(Qt|Glade)$/) {
 | |
|         push @{$extensions{'Glade'}} , "ui";
 | |
|         push @{$extensions{'Qt'}}    , "ui";
 | |
|         delete $extensions{'Qt/Glade'};
 | |
|     }
 | |
|     if (!$language or $language =~ /^(C#|Smalltalk)$/) {
 | |
|         push @{$extensions{'C#'}}           , "cs";
 | |
|         push @{$extensions{'Smalltalk'}}    , "cs";
 | |
|         delete $extensions{'C#/Smalltalk'};
 | |
|     }
 | |
|     if (!$language or $language =~ /^(Visual\s+Basic|TeX|Apex\s+Class)$/i) {
 | |
|         push @{$extensions{'Visual Basic'}} , "cls";
 | |
|         push @{$extensions{'TeX'}}          , "cls";
 | |
|         push @{$extensions{'Apex Class'}}   , "cls";
 | |
|         delete $extensions{'Visual Basic/TeX/Apex Class'};
 | |
|     }
 | |
|     if (!$language or $language =~ /^(Ant)$/i) {
 | |
|         push @{$extensions{'Ant'}}  , "build.xml";
 | |
|         delete $extensions{'Ant/XML'};
 | |
|     }
 | |
|     if (!$language or $language =~ /^(Scheme|SaltStack)$/i) {
 | |
|         push @{$extensions{'Scheme'}}    , "sls";
 | |
|         push @{$extensions{'SaltStack'}} , "sls";
 | |
|         delete $extensions{'Scheme/SaltStack'};
 | |
|     }
 | |
|     if ($opt_explain) {
 | |
|         return unless $extensions{$language};
 | |
|         if ($prefix) {
 | |
|             printf "%s %s\n", $prefix, join(", ", @{$extensions{$language}});
 | |
|         } else {
 | |
|             printf "%-26s (%s)\n", $language, join(", ", @{$extensions{$language}});
 | |
|         }
 | |
|     } else {
 | |
|         if (%extensions) {
 | |
|             foreach my $lang (sort {lc $a cmp lc $b } keys %extensions) {
 | |
|                 next if $lang =~ /Brain/;
 | |
|                 if ($prefix) {
 | |
|                     printf "%s %s\n", $prefix, join(", ", @{$extensions{$lang}});
 | |
|                 } else {
 | |
|                     printf "%-26s (%s)\n", $lang, join(", ", @{$extensions{$lang}});
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| } # 1}}}
 | |
| sub print_language_filters {                 # {{{1
 | |
|     my ($language,) = @_;
 | |
|     if (!$Filters_by_Language{$language} or
 | |
|         !@{$Filters_by_Language{$language}}) {
 | |
|         warn "Unknown language: $language\n";
 | |
|         warn "Use --show-lang to list all defined languages.\n";
 | |
|         return;
 | |
|     }
 | |
|     printf "%s\n", $language;
 | |
|     foreach my $filter (@{$Filters_by_Language{$language}}) {
 | |
|         printf "    filter %s", $filter->[0];
 | |
|         printf "  %s", $filter->[1] if defined $filter->[1];
 | |
|         printf "  %s", $filter->[2] if defined $filter->[2];
 | |
|         printf "  %s", $filter->[3] if defined $filter->[3];
 | |
|         printf "  %s", $filter->[4] if defined $filter->[4];
 | |
|         print  "\n";
 | |
|     }
 | |
|     print_language_info($language, "    extensions:");
 | |
| } # 1}}}
 | |
| sub top_level_SMB_dir {                      # {{{1
 | |
|     # Ref https://github.com/AlDanial/cloc/issues/392, if the
 | |
|     # user supplies a directory name which is an SMB mount
 | |
|     # point, this directory will appear to File::Find as
 | |
|     # though it is empty unless $File::Find::dont_use_nlink
 | |
|     # is set to 1.  This subroutine checks to see if any SMB
 | |
|     # mounts (identified from stat()'s fourth entry, nlink,
 | |
|     # having a value of 2) were passed in on the command line.
 | |
| 
 | |
|     my ($ra_arg_list,) = @_;  # in user supplied file name, directory name, git hash, etc
 | |
|     foreach my $entry (@{$ra_arg_list}) {
 | |
|         next unless is_dir($entry);
 | |
|         # gets here if $entry is a directory; now get its nlink value
 | |
|         my $nlink;
 | |
|         if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | |
|             my $stats = statL($entry);
 | |
|             $nlink = $stats->{nlink} if defined $stats;
 | |
|         } else {
 | |
|             my @stats = stat($entry);
 | |
|             $nlink = $stats[3];
 | |
|         }
 | |
|         return 1 if $nlink == 2;  # meaning it is an SMB mount
 | |
|     }
 | |
|     return 0;
 | |
| }
 | |
| # 1}}}
 | |
| sub get_git_metadata {                       # {{{1
 | |
|     my ($ra_arg_list,             # in  file name, directory name and/or
 | |
|                                   #     git commit hash to examine
 | |
|         $rh_git_metadata) = @_;   # out repo info
 | |
|     # Capture git information where possible--origin, branch, commit hash.
 | |
|     my $prt_args = join(",", @{$ra_arg_list});
 | |
|     print "-> get_git_metadata($prt_args)\n" if $opt_v > 2;
 | |
|     foreach my $arg (@{$ra_arg_list}) {
 | |
|         next if is_file($arg);
 | |
|         my $origin = `git remote get-url origin 2>&1`;
 | |
|         next if $origin =~ /^fatal:/;
 | |
|         chomp($rh_git_metadata->{$arg}{"origin"} = $origin);
 | |
|         chomp($rh_git_metadata->{$arg}{"branch"} = `git symbolic-ref --short HEAD`);
 | |
|         if (is_dir($arg)) {
 | |
|             chomp($rh_git_metadata->{$arg}{"commit"}   = `git rev-parse HEAD`);
 | |
|         } else {
 | |
|             chomp($rh_git_metadata->{$arg}{"commit"}   = `git rev-parse $arg`);
 | |
|         }
 | |
|     }
 | |
|     print "<- get_git_metadata()\n" if $opt_v > 2;
 | |
| } # 1}}}
 | |
| sub replace_git_hash_with_tarfile {          # {{{1
 | |
|     my ($ra_arg_list,             # in  file name, directory name and/or
 | |
|                                   #     git commit hash to examine
 | |
|         $ra_git_similarity) = @_; # out only if --opt-git-diff-simindex
 | |
|     # replace git hashes in $ra_arg_list with tar files
 | |
|     # Diff mode and count mode behave differently:
 | |
|     #   Diff:
 | |
|     #       file  git_hash
 | |
|     #          Extract file from the git repo and only compare to it.
 | |
|     #       git_hash1  git_hash2
 | |
|     #          Get listings of all files in git_hash1 and git_hash2.
 | |
|     #            git ls-tree --name-only -r *git_hash1*
 | |
|     #            git ls-tree --name-only -r *git_hash2*
 | |
|     #          Next, get listings of files that changed with git_hash1
 | |
|     #          and git_hash2.
 | |
|     #            git diff-tree -r --no-commit-id --name-only *git_hash1* *git_hash2*
 | |
|     #          Finally, make two tar files of git repos1 and 2 where the file
 | |
|     #          listing is the union of changes.
 | |
|     #            git archive -o tarfile1 *git_hash1* \
 | |
|     #               <union of files that changed and exist in this commit>
 | |
|     #            git archive -o tarfile2 *git_hash2* \
 | |
|     #               <union of files that changed and exist in this commit>
 | |
|     #          To avoid "Argument list too long" error, repeat the git
 | |
|     #          archive step with chunks of 30,000 files at a time then
 | |
|     #          merge the tar files as the final step.
 | |
|     #   Regular count:
 | |
|     #       Simply make a tar file of all files in the git repo.
 | |
| 
 | |
|     my $prt_args = join(",", @{$ra_arg_list});
 | |
|     print "-> replace_git_hash_with_tarfile($prt_args)\n" if $opt_v > 2;
 | |
| #print "ra_arg_list 1: @{$ra_arg_list}\n";
 | |
| 
 | |
|     my $hash_regex = qr/^([a-f\d]{5,40}|master|HEAD)(~\d+)?$/;
 | |
|     my %replacement_arg_list = ();
 | |
| 
 | |
|     # early exit if none of the inputs look like git hashes
 | |
|     my %git_hash = ();
 | |
|     my $i = 0;
 | |
|     foreach my $file_or_dir (@{$ra_arg_list}) {
 | |
|         ++$i;
 | |
|         if (can_read($file_or_dir)) { # readable file or dir; not a git hash
 | |
|             $replacement_arg_list{$i} = $file_or_dir;
 | |
|             next;
 | |
|         } elsif ($opt_force_git or $file_or_dir =~ m/$hash_regex/) {
 | |
|             $git_hash{$file_or_dir} = $i;
 | |
|         } # else the input can't be understood; ignore for now
 | |
|     }
 | |
|     return unless %git_hash;
 | |
| 
 | |
| #   my $have_tar_git = external_utility_exists($ON_WINDOWS ? "unzip" : "tar --version") &&
 | |
|     my $have_tar_git = external_utility_exists("tar --version") &&
 | |
|                        external_utility_exists("git --version");
 | |
|     if (!$have_tar_git) {
 | |
|         warn "One or more inputs looks like a git hash but " .
 | |
|              "either git or tar is unavailable.\n";
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     my %repo_listing = ();  # $repo_listing{hash}{files} = 1;
 | |
|     foreach my $hash (sort keys %git_hash) {
 | |
|         my $git_list_cmd = "git ls-tree --name-only -r ";
 | |
|         if ($hash =~ m/(.*?):(.*?)$/) {
 | |
|             # requesting specific file(s) from this hash; grep for them
 | |
|             # Note:  this capability not fully implemented yet
 | |
|             $git_list_cmd .= "$1|grep '$2'";
 | |
|         } else {
 | |
|             $git_list_cmd .= $hash;
 | |
|         }
 | |
|         print "$git_list_cmd\n" if $opt_v;
 | |
|         foreach my $file (`$git_list_cmd`) {
 | |
|             $file =~ s/\s+$//;
 | |
|             $repo_listing{$hash}{$file} = 1;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     # logic for each of the modes
 | |
|     if ($opt_diff) {
 | |
| #print "A DIFF\n";
 | |
|         # set the default git diff algorithm
 | |
|         $opt_git_diff_rel = 1 unless $opt_git_diff_all or
 | |
|                                      $opt_git_diff_simindex;
 | |
|         # is it git to git, or git to file/dir ?
 | |
|         my ($Left, $Right) = @{$ra_arg_list};
 | |
| 
 | |
| #use Data::Dumper;
 | |
| #print "diff_listing= "; print Dumper(\%diff_listing);
 | |
| #print "git_hash= "; print Dumper(\%git_hash);
 | |
|         if ($git_hash{$Left} and $git_hash{$Right}) {
 | |
| #print "A DIFF git-to-git\n";
 | |
|             # git to git
 | |
|             # first make a union of all files that have changed in both commits
 | |
|             my %files_union = ();
 | |
| 
 | |
|             my @left_files  = ();
 | |
|             my @right_files = ();
 | |
|             if ($opt_git_diff_rel) {
 | |
|                 # Strategy 1:  Union files are what git consinders have changed
 | |
|                 #              between the two commits.
 | |
|                 my $git_list_cmd = "git diff-tree -r --no-commit-id --name-only $Left $Right";
 | |
|                 # print "$git_list_cmd\n" if $opt_v;
 | |
|                 foreach my $file (`$git_list_cmd`) {
 | |
|                     chomp($file);
 | |
|                     $files_union{$file} = 1;
 | |
|                 }
 | |
|             } elsif ($opt_git_diff_all) {
 | |
|                 # Strategy 2:  Union files all files in both repos.
 | |
|                 foreach my $file (keys %{$repo_listing{$Left }},
 | |
|                                   keys %{$repo_listing{$Right}}) {
 | |
|                    $files_union{$file} = 1;
 | |
|                 }
 | |
|             } elsif ($opt_git_diff_simindex) {
 | |
|                 # Strategy 3:  Use git's own similarity index to figure
 | |
|                 #              out which files to compare.
 | |
|                 git_similarity_index($Left              , # in
 | |
|                                      $Right             , # in
 | |
|                                     \@left_files        , # out
 | |
|                                     \@right_files       , # out
 | |
|                                      $ra_git_similarity); # out
 | |
| 
 | |
|             }
 | |
| 
 | |
|             if ($opt_exclude_list_file) {
 | |
|                 my @reject_list = read_list_file($opt_exclude_list_file);
 | |
|                 my %entries_to_drop = ();
 | |
|                 foreach my $f_or_d (@reject_list) {
 | |
|                     if (-d $f_or_d) {
 | |
|                         # directory match
 | |
|                         $entries_to_drop{$f_or_d} = 1;
 | |
|                         foreach my $file (keys %files_union) {
 | |
|                             $entries_to_drop{$file} = 1 if $file =~ m{$f_or_d/};
 | |
|                         }
 | |
|                     } else {
 | |
|                         # exact match
 | |
|                         $entries_to_drop{$f_or_d} = 1;
 | |
|                     }
 | |
|                 }
 | |
|                 foreach my $file (keys %entries_to_drop) {
 | |
|                     delete $files_union{$file};
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             if ($opt_list_file) {
 | |
|                 my @include_list = read_list_file($opt_list_file);
 | |
|                 my %entries_to_add  = ();
 | |
|                 foreach my $f_or_d (@include_list) {
 | |
|                     if (-d $f_or_d) {
 | |
|                         # directory match
 | |
|                         $entries_to_add{$f_or_d} = 1;
 | |
|                         foreach my $file (keys %files_union) {
 | |
|                             $entries_to_add{$file} = 1 if $file =~ m{$f_or_d/};
 | |
|                         }
 | |
|                     } else {
 | |
|                         # exact match
 | |
|                         $entries_to_add{$f_or_d} = 1;
 | |
|                     }
 | |
|                 }
 | |
|                 %files_union = %entries_to_add; # and exclude all others
 | |
|             }
 | |
| 
 | |
|             # then make truncated tar files of those union files which
 | |
|             # actually exist in each repo
 | |
|             foreach my $file (sort keys %files_union) {
 | |
|                 push @left_files , $file if $repo_listing{$Left }{$file};
 | |
|                 push @right_files, $file if $repo_listing{$Right}{$file};
 | |
|             }
 | |
|             # backslash whitespace, weird chars within file names (#257, #284)
 | |
| 
 | |
| #           my @Lfiles= map {$_ =~ s/([\s\(\)\[\]{}';\^\$\?])/\\$1/g; $_}   @left_files;
 | |
| #           my @Lfiles= @left_files;
 | |
|             if(scalar(@left_files) > 0) {
 | |
|                 $replacement_arg_list{$git_hash{$Left}}  = git_archive($Left , \@left_files);
 | |
|             } else {
 | |
|                 # In the right side commit ONLY file(s) was added, so no file(s) will exist in the left side commit.
 | |
|                 # Create empty TAR to detect added lines of code.
 | |
|                 $replacement_arg_list{$git_hash{$Left}} = empty_tar();
 | |
|             }
 | |
| #           $replacement_arg_list{$git_hash{$Left}}  = git_archive($Left , \@Lfiles);
 | |
| #           my @Rfiles= map {$_ =~ s/([\s\(\)\[\]{}';\^\$\?])/\\$1/g; $_}   @right_files ;
 | |
| #           my @Rfiles= @right_files ;
 | |
| #use Data::Dumper;
 | |
| #print Dumper('left' , \@left_files);
 | |
| #print Dumper('right', \@right_files);
 | |
| #die;
 | |
| 
 | |
|             if(scalar(@right_files) > 0) {
 | |
|                 $replacement_arg_list{$git_hash{$Right}} = git_archive($Right, \@right_files);
 | |
|             } else {
 | |
|                  # In the left side commit ONLY file(s) was deleted, so file(s) will not exist in right side commit.
 | |
|                  # Create empty TAR to detect removed lines of code.
 | |
|                  $replacement_arg_list{$git_hash{$Right}} = empty_tar();
 | |
|             }
 | |
| #           $replacement_arg_list{$git_hash{$Right}} = git_archive($Right, \@Rfiles);
 | |
| #write_file("/tmp/Lfiles.txt", {}, sort @Lfiles);
 | |
| #write_file("/tmp/Rfiles.txt", {}, sort @Rfiles);
 | |
| #write_file("/tmp/files_union.txt", {}, sort keys %files_union);
 | |
| 
 | |
|         } else {
 | |
| #print "A DIFF git-to-file or file-to-git Left=$Left Right=$Right\n";
 | |
|             # git to file/dir or file/dir to git
 | |
|             if      ($git_hash{$Left}  and $repo_listing{$Left}{$Right} ) {
 | |
| #print "A DIFF 1\n";
 | |
|                 # $Left is a git hash and $Right is a file
 | |
|                 $replacement_arg_list{$git_hash{$Left}}  = git_archive($Left, $Right);
 | |
|             } elsif ($git_hash{$Right} and $repo_listing{$Right}{$Left}) {
 | |
| #print "A DIFF 2\n";
 | |
|                 # $Left is a file and $Right is a git hash
 | |
|                 $replacement_arg_list{$git_hash{$Right}} = git_archive($Right, $Left);
 | |
|             } elsif ($git_hash{$Left}) {
 | |
| #print "A DIFF 3\n";
 | |
|                 # assume Right is a directory; tar the entire git archive at this hash
 | |
|                 $replacement_arg_list{$git_hash{$Left}}  = git_archive($Left, "");
 | |
|             } else {
 | |
| #print "A DIFF 4\n";
 | |
|                 # assume Left  is a directory; tar the entire git archive at this hash
 | |
|                 $replacement_arg_list{$git_hash{$Right}} = git_archive($Right, "");
 | |
|             }
 | |
|         }
 | |
|     } else {
 | |
| #print "B COUNT\n";
 | |
|         foreach my $hash (sort keys %git_hash) {
 | |
|             $replacement_arg_list{$git_hash{$hash}} = git_archive($hash);
 | |
|         }
 | |
|     }
 | |
| # print "git_hash= "; print Dumper(\%git_hash);
 | |
| #print "repo_listing= "; print Dumper(\%repo_listing);
 | |
| 
 | |
|     # replace the input arg list with the new one
 | |
|     @{$ra_arg_list} = ();
 | |
|     foreach my $index (sort {$a <=> $b} keys %replacement_arg_list) {
 | |
|         push @{$ra_arg_list}, $replacement_arg_list{$index};
 | |
|     }
 | |
| 
 | |
| #print "ra_arg_list 2: @{$ra_arg_list}\n";
 | |
|     print "<- replace_git_hash_with_tarfile()\n" if $opt_v > 2;
 | |
| } # 1}}}
 | |
| sub git_similarity_index {                   # {{{
 | |
|     my ($git_hash_Left    ,       # in
 | |
|         $git_hash_Right   ,       # in
 | |
|         $ra_left_files    ,       # out
 | |
|         $ra_right_files   ,       # out
 | |
|         $ra_git_similarity) = @_; # out
 | |
|     die "this option is not yet implemented";
 | |
|     print "-> git_similarity_index($git_hash_Left, $git_hash_Right)\n" if $opt_v > 2;
 | |
|     my $cmd = "git diff -M --name-status $git_hash_Left $git_hash_Right";
 | |
|     print  $cmd, "\n" if $opt_v;
 | |
|     open(GSIM, "$cmd |") or die "Unable to run $cmd  $!";
 | |
|     while (<GSIM>) {
 | |
|         print "git similarity> $_";
 | |
|     }
 | |
|     close(GSIM);
 | |
|     print "<- git_similarity_index\n" if $opt_v > 2;
 | |
| } # 1}}}
 | |
| sub empty_tar {                              # {{{1
 | |
|     my ($Tarfh, $Tarfile);
 | |
|     if ($opt_sdir) {
 | |
|       File::Path::mkpath($opt_sdir) unless is_dir($opt_sdir);
 | |
|       ($Tarfh, $Tarfile) = tempfile(UNLINK => 1, DIR => $opt_sdir, SUFFIX => $ON_WINDOWS ? '.zip' : '.tar');  # delete on exit
 | |
|     } else {
 | |
|       ($Tarfh, $Tarfile) = tempfile(UNLINK => 1, SUFFIX => $ON_WINDOWS ? '.zip' : '.tar');  # delete on exit
 | |
|     }
 | |
|     my $cmd = $ON_WINDOWS ? "type nul > $Tarfile" : "tar -cf $Tarfile -T /dev/null";
 | |
|     print  $cmd, "\n" if $opt_v;
 | |
|     system $cmd;
 | |
|     if (!can_read($Tarfile)) {
 | |
|         # not readable
 | |
|         die "Failed to create empty tarfile.";
 | |
|     }
 | |
| 
 | |
|     return $Tarfile;
 | |
| } # 1}}}
 | |
| sub git_archive {                            # {{{1
 | |
|     # Invoke 'git archive' as a system command to create a tar file
 | |
|     # using the given argument(s).
 | |
|     my ($A1, $A2) = @_;
 | |
|     print "-> git_archive($A1)\n" if $opt_v > 2;
 | |
| 
 | |
|     my $args = undef;
 | |
|     my @File_Set = ( );
 | |
|     my $n_sets   = 1;
 | |
|     if (ref $A2 eq 'ARRAY') {
 | |
|         # Avoid "Argument list too long" for the git archive command
 | |
|         # by splitting the inputs into sets of 10,000 files (issue 273).
 | |
|         my $FILES_PER_ARCHIVE = 1_000;
 | |
|            $FILES_PER_ARCHIVE =   100 if $ON_WINDOWS; # github.com/AlDanial/cloc/issues/404
 | |
| 
 | |
|         my $n_files  = scalar(@{$A2});
 | |
|         $n_sets = $n_files/$FILES_PER_ARCHIVE;
 | |
|         $n_sets = 1 + int($n_sets) if $n_sets > int($n_sets);
 | |
|         $n_sets = 1 if !$n_sets;
 | |
|         foreach my $i (0..$n_sets-1) {
 | |
|             @{$File_Set[$i]} = ( );
 | |
|             my $start = $i*$FILES_PER_ARCHIVE;
 | |
|             my $end   = smaller(($i+1)*$FILES_PER_ARCHIVE, $n_files) - 1;
 | |
|             # Wrap each file name in single quotes to protect spaces
 | |
|             # and other odd characters.  File names that themselves have
 | |
|             # single quotes are instead wrapped in double quotes.  File
 | |
|             # names with both single and double quotes... jeez.
 | |
|             foreach my $fname (@{$A2}[$start .. $end]) {
 | |
|                 if      ($fname =~ /^".*?\\".*?"$/) {
 | |
|                     # git pre-handles filenames with double quotes by backslashing
 | |
|                     # each double quote then surrounding entire name in double
 | |
|                     # quotes; undo this otherwise archive command crashes
 | |
|                     $fname =~ s/\\"/"/g;
 | |
|                     $fname =~ s/^"(.*)"$/$1/;
 | |
|                 } elsif ($fname =~ /'/ or $ON_WINDOWS) {
 | |
|                     push @{$File_Set[$i]}, "\"$fname\"";
 | |
|                 } else {
 | |
|                     push @{$File_Set[$i]}, "'$fname'";
 | |
|                 }
 | |
|             }
 | |
|             unshift @{$File_Set[$i]}, "$A1 ";  # prepend git hash to beginning of list
 | |
| 
 | |
| ##xx#        # don't include \$ in the regex because git handles these correctly
 | |
| #            # to each word in @{$A2}[$start .. $end]: first backslash each
 | |
| #            # single quote, then wrap all entries in single quotes (#320)
 | |
| #            push @File_Set,
 | |
| #                 "$A1 " . join(" ", map {$_ =~ s/'/\'/g; $_ =~ s/^(.*)$/'$1'/g; $_}
 | |
| ##                "$A1 " . join(" ", map {$_ =~ s/([\s\(\)\[\]{}';\^\?])/\\$1/g; $_}
 | |
| #                              @{$A2}[$start .. $end]);
 | |
|         }
 | |
|     } else {
 | |
|         if (defined $A2) {
 | |
|             push @{$File_Set[0]}, "$A1 $A2";
 | |
|         } else {
 | |
|             push @{$File_Set[0]}, "$A1";
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     my $files_this_commit = join(" ", @{$File_Set[0]});
 | |
|     print "   git_archive(file_set[0]=$files_this_commit)\n" if $opt_v > 2;
 | |
|     my ($Tarfh, $Tarfile);
 | |
|     if ($opt_sdir) {
 | |
|       File::Path::mkpath($opt_sdir) unless is_dir($opt_sdir);
 | |
|       ($Tarfh, $Tarfile) = tempfile(UNLINK => 1, DIR => $opt_sdir, SUFFIX => '.tar');  # delete on exit
 | |
|     } else {
 | |
|       ($Tarfh, $Tarfile) = tempfile(UNLINK => 1, SUFFIX => '.tar');  # delete on exit
 | |
|     }
 | |
|     my $cmd = "git archive -o $Tarfile $files_this_commit";
 | |
|     print  $cmd, "\n" if $opt_v;
 | |
|     system $cmd;
 | |
|     if (!can_read($Tarfile) or !get_size($Tarfile)) {
 | |
|         # not readable, or zero sized
 | |
|         die "Failed to create tarfile of files from git.";
 | |
|     }
 | |
|     if ($n_sets > 1) {
 | |
|         if ($ON_WINDOWS) {
 | |
|             my @tar_files = ( $Tarfile );
 | |
|             my $start_dir = cwd;
 | |
|             foreach my $i (1..$n_sets-1) {
 | |
|                 my $fname = sprintf "%s_extra_%08d", $Tarfile, $i;
 | |
|                 my $files_this_commit = join(" ", @{$File_Set[$i]});
 | |
|                 my $cmd = "git archive -o $fname $files_this_commit";
 | |
|                 print  $cmd, "\n" if $opt_v;
 | |
|                 system $cmd;
 | |
|                 push @tar_files, $fname;
 | |
|             }
 | |
|             # Windows tar can't combine tar files so expand
 | |
|             # them all to one directory then re-tar
 | |
|             my $extract_dir = tempdir( CLEANUP => 0 );  # 1 = delete on exit
 | |
|             chdir "$extract_dir";
 | |
|             foreach my $T (@tar_files) {
 | |
|                 next unless is_file($T) and get_size($T);
 | |
|                 my $cmd = "tar -x -f \"$T\"";
 | |
|                 print  $cmd, "\n" if $opt_v;
 | |
|                 system $cmd;
 | |
|                 unlink "$T";
 | |
|             }
 | |
|             chdir "..";
 | |
|             $Tarfile .= ".final.tar";
 | |
|             my $cmd = "tar -c -f \"${Tarfile}\" \"$extract_dir\"";
 | |
|             print  $cmd, "\n" if $opt_v;
 | |
|             system $cmd;
 | |
|             chdir "$start_dir";
 | |
|         } else {
 | |
|             foreach my $i (1..$n_sets-1) {
 | |
|                 my $files_this_commit = join(" ", @{$File_Set[$i]});
 | |
|                 my $cmd = "git archive -o ${Tarfile}_extra $files_this_commit";
 | |
|                 print  $cmd, "\n" if $opt_v;
 | |
|                 system $cmd;
 | |
|                 # and merge into the first one
 | |
|                 $cmd = "tar -A -f ${Tarfile} ${Tarfile}_extra";
 | |
|                 print  $cmd, "\n" if $opt_v;
 | |
|                 system $cmd;
 | |
|             }
 | |
|             unlink "${Tarfile}_extra";
 | |
|         }
 | |
|     }
 | |
|     print "<- git_archive() made $Tarfile\n" if $opt_v > 2;
 | |
|     return $Tarfile
 | |
| } # 1}}}
 | |
| sub smaller {                                # {{{1
 | |
|     my( $a, $b ) = @_;
 | |
|     return $a < $b ? $a : $b;
 | |
| } # 1}}}
 | |
| sub lower_on_Windows {                       # {{{1
 | |
|     # If on Unix(-like), do nothing, just return the input.
 | |
|     # If on Windows, return a lowercase version of the file
 | |
|     # and also update %upper_lower_map with this new entry.
 | |
|     # Needed in make_file_list() because the full file list
 | |
|     # isn't known until the end of that routine--where
 | |
|     # %upper_lower_map is ordinarily populated.
 | |
|     my ($path,) = @_;
 | |
|     return $path unless $ON_WINDOWS;
 | |
|     my $lower = lc $path;
 | |
|     $upper_lower_map{$lower} = $path;
 | |
|     return $lower;
 | |
| }
 | |
| # }}}
 | |
| sub make_file_list {                         # {{{1
 | |
|     my ($ra_arg_list,  # in   file and/or directory names to examine
 | |
|         $iteration  ,  # in   0 if only called once, 1 or 2 if twice for diff
 | |
|         $rh_Err     ,  # in   hash of error codes
 | |
|         $raa_errors ,  # out  errors encountered
 | |
|         $rh_ignored ,  # out  files not recognized as computer languages
 | |
|         ) = @_;
 | |
|     print "-> make_file_list(@{$ra_arg_list})\n" if $opt_v > 2;
 | |
| 
 | |
|     my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ",";
 | |
|     my ($fh, $filename);
 | |
|     if ($opt_categorized) {
 | |
|         if ($iteration) {
 | |
|             # am being called twice for diff of Left and Right
 | |
|             my $ext = $iteration == 1 ? "L" : "R";
 | |
|             $filename = $opt_categorized . "-$ext";
 | |
|         } else {
 | |
|             $filename = $opt_categorized;
 | |
|         }
 | |
|         $fh = open_file('+>', $filename, 1);  # open for read/write
 | |
|         die "Unable to write to $filename:  $!\n" unless defined $fh;
 | |
|     } elsif ($opt_sdir) {
 | |
|         # write to the user-defined scratch directory
 | |
|         ++$TEMP_OFF;
 | |
|         my $scr_dir = "$opt_sdir/$TEMP_OFF";
 | |
|         File::Path::mkpath($scr_dir) unless is_dir($scr_dir);
 | |
|         $filename = $scr_dir . '/cloc_file_list.txt';
 | |
|         $fh = open_file('+>', $filename, 1);  # open for read/write
 | |
|         die "Unable to write to $filename:  $!\n" unless defined $fh;
 | |
|     } else {
 | |
|         # let File::Temp create a suitable temporary file
 | |
|         ($fh, $filename) = tempfile(UNLINK => 1);  # delete file on exit
 | |
|         print "Using temp file list [$filename]\n" if $opt_v;
 | |
|     }
 | |
| 
 | |
|     my @dir_list = ();
 | |
|     foreach my $file_or_dir (@{$ra_arg_list}) {
 | |
|         my $size_in_bytes = 0;
 | |
|         my $F = lower_on_Windows($file_or_dir);
 | |
|         my $ul_F = $ON_WINDOWS ? $upper_lower_map{$F} : $F;
 | |
|         if (!can_read($file_or_dir)) {
 | |
|             push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $F];
 | |
|             next;
 | |
|         }
 | |
|         if (is_file($file_or_dir)) {
 | |
|             if (!get_size($file_or_dir)) {   # 0 sized file, named pipe, socket
 | |
|                 $rh_ignored->{$F} = 'zero sized file';
 | |
|                 next;
 | |
|             } elsif (is_binary($file_or_dir) and !$opt_read_binary_files) {
 | |
|                 # avoid binary files unless user insists on reading them
 | |
|                 if ($opt_unicode) {
 | |
|                     # only ignore if not a Unicode file w/trivial
 | |
|                     # ASCII transliteration
 | |
|                     if (!unicode_file($file_or_dir)) {
 | |
|                         $rh_ignored->{$ul_F} = 'binary file';
 | |
|                         next;
 | |
|                     }
 | |
|                 } else {
 | |
|                     $rh_ignored->{$ul_F} = 'binary file';
 | |
|                     next;
 | |
|                 }
 | |
|             }
 | |
|             push @file_list, "$file_or_dir";
 | |
|         } elsif (is_dir($file_or_dir)) {
 | |
|             push @dir_list, $file_or_dir;
 | |
|         } else {
 | |
|             push @{$raa_errors}, [$rh_Err->{'Neither file nor directory'} , $F];
 | |
|             $rh_ignored->{$F} = 'not file, not directory';
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     # apply exclusion rules to file names passed in on the command line
 | |
|     my @new_file_list = ();
 | |
|     foreach my $File (@file_list) {
 | |
|         my ($volume, $directories, $filename) = File::Spec->splitpath( $File );
 | |
|         my $ignore_this_file = 0;
 | |
|         foreach my $Sub_Dir ( File::Spec->splitdir($directories) ) {
 | |
|             my $SD = lower_on_Windows($Sub_Dir);
 | |
|             if ($Exclude_Dir{$Sub_Dir}) {
 | |
|                 $Ignored{$SD} = "($File) --exclude-dir=$Sub_Dir";
 | |
|                 $ignore_this_file = 1;
 | |
|                 last;
 | |
|             }
 | |
|         }
 | |
|         push @new_file_list, $File unless $ignore_this_file;
 | |
|     }
 | |
|     @file_list = @new_file_list;
 | |
|     foreach my $dir (@dir_list) {
 | |
|         my $D = lower_on_Windows($dir);
 | |
| #print "make_file_list dir=$dir  Exclude_Dir{$dir}=$Exclude_Dir{$dir}\n";
 | |
|         # populates global variable @file_list
 | |
|         if ($Exclude_Dir{$dir}) {
 | |
|             $Ignored{$D} = "--exclude-dir=$Exclude_Dir{$dir}";
 | |
|             next;
 | |
|         }
 | |
|         if ($opt_no_recurse) {
 | |
|             if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | |
|                 my $d = Win32::LongPath->new();
 | |
|                 $d->opendirL($dir);
 | |
|                 foreach my $entry ($d->readdirL()) {
 | |
|                     my $F = "$dir/$entry";
 | |
|                     push @file_list, $F if is_file($F);
 | |
|                 }
 | |
|                 $d->closedirL();
 | |
|             } else {
 | |
|                 opendir(DIR, $dir);
 | |
|                 push @file_list, grep(is_file($_), readdir(DIR));
 | |
|                 closedir(DIR);
 | |
|             }
 | |
|         } else {
 | |
|             find({wanted     => \&files            ,
 | |
|                   preprocess => \&find_preprocessor,
 | |
|                   follow     =>  $opt_follow_links }, $dir);
 | |
|         }
 | |
|     }
 | |
|     if ($opt_follow_links) {
 | |
|         # giving { 'follow' => 1 } to find() makes it skip the
 | |
|         # call to find_preprocessor() so have to call this manually
 | |
|         @file_list = manual_find_preprocessor(@file_list);
 | |
|     }
 | |
| 
 | |
|     # there's a possibility of file duplication if user provided a list
 | |
|     # file or --vcs command that returns directory names; squash these
 | |
|     my %unique_file_list = map { $_ => 1 } @file_list;
 | |
|     @file_list = sort keys %unique_file_list;
 | |
| 
 | |
|     $nFiles_Found = scalar @file_list;
 | |
|     printf "%8d text file%s.\n", plural_form($nFiles_Found) unless $opt_quiet;
 | |
|     write_file($opt_found, {}, sort @file_list) if $opt_found;
 | |
| 
 | |
|     my $nFiles_Categorized = 0;
 | |
| 
 | |
|     foreach my $file (@file_list) {
 | |
|         my $F = lower_on_Windows($file);
 | |
|         if ($ON_WINDOWS) {
 | |
|             (my $lc = lc $file) =~ s{\\}{/}g;
 | |
|             $upper_lower_map{$lc} = $file;
 | |
|             $file = $lc;
 | |
|         }
 | |
|         printf "classifying $file\n" if $opt_v > 2;
 | |
| 
 | |
|         my $basename = basename $file;
 | |
|         if ($Not_Code_Filename{$basename}) {
 | |
|             $rh_ignored->{$F} = "listed in " . '$' .
 | |
|                 "Not_Code_Filename{$basename}";
 | |
|             next;
 | |
|         } elsif ($basename =~ m{~$}) {
 | |
|             $rh_ignored->{$F} = "temporary editor file";
 | |
|             next;
 | |
|         }
 | |
| 
 | |
|         my $size_in_bytes;
 | |
|         if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | |
|             my $stats = statL($file);
 | |
|             $size_in_bytes = $stats->{size} if defined $stats;
 | |
|         } else {
 | |
|             $size_in_bytes = (stat $file)[7];
 | |
|         }
 | |
|         my $language      = "";
 | |
|         if ($All_One_Language) {
 | |
|             # user over-rode auto-language detection by using
 | |
|             # --force-lang with just a language name (no extension)
 | |
|             $language      = $All_One_Language;
 | |
|         } else {
 | |
|             $language      = classify_file($file      ,
 | |
|                                            $rh_Err    ,
 | |
|                                            $raa_errors,
 | |
|                                            $rh_ignored);
 | |
|         }
 | |
|         if (!defined $size_in_bytes) {
 | |
|             $rh_ignored->{$F} = "no longer readable";
 | |
|             next;
 | |
|         } elsif (!defined $language) {
 | |
|             $rh_ignored->{$F} = "unable to associate with a language";
 | |
|             next;
 | |
|         } elsif ($language eq "(unknown)") {
 | |
|             # entry should already be in %{$rh_ignored}
 | |
|             next;
 | |
|         }
 | |
|         printf $fh "%d%s%s%s%s\n", $size_in_bytes, $separator,
 | |
|                                    $language, $separator, $file;
 | |
|         ++$nFiles_Categorized;
 | |
|         #printf "classified %d files\n", $nFiles_Categorized
 | |
|         #    unless (!$opt_progress_rate or
 | |
|         #            ($nFiles_Categorized % $opt_progress_rate));
 | |
|     }
 | |
|     printf "classified %d files\r", $nFiles_Categorized
 | |
|         if !$opt_quiet and $nFiles_Categorized > 1;
 | |
|     print "<- make_file_list()\n" if $opt_v > 2;
 | |
| 
 | |
|     return $fh;   # handle to the file containing the list of files to process
 | |
| }  # 1}}}
 | |
| sub invoke_generator {                       # {{{1
 | |
|     my ($generator, $ra_user_inputs) = @_;
 | |
|     # If user provided file/directory inputs, only return
 | |
|     # generated files that are in user's request.
 | |
|     # Populates global variable %Ignored.
 | |
|     print "-> invoke_generator($generator, @{$ra_user_inputs})\n" if $opt_v > 2;
 | |
|     my $start_dir = cwd();
 | |
|     my @dir_list  = ();
 | |
|     my @file_list = ();
 | |
| 
 | |
|     if (!@{$ra_user_inputs}) {
 | |
|         # input must be a generator command, ie "find -type f -name '*.c'"
 | |
|         # issued from the cwd
 | |
|         push @dir_list, ".";
 | |
|     }
 | |
|     foreach my $file_dir (@{$ra_user_inputs}) {
 | |
|         if (is_dir($file_dir)) {
 | |
|             push @dir_list, $file_dir;
 | |
|         } elsif (is_file($file_dir)) {
 | |
|             push @file_list, $file_dir;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     my @files = ();
 | |
|     foreach my $work_dir (@dir_list) {
 | |
|         if ($work_dir ne $start_dir) {
 | |
|             chdir $work_dir or die "Failed to chdir to $work_dir: $!";
 | |
|         }
 | |
|         open(FH, "$generator |") or die "Failed to pipe $generator: $!";
 | |
|         while(<FH>) {
 | |
|             chomp;
 | |
|             my $F = "$work_dir/$_";
 | |
|             print "VCS input:  $F\n" if $opt_v >= 2;
 | |
|             if (!@file_list) {
 | |
|                 # no files given on the command line; accept all
 | |
|                 push @files, $F;
 | |
|             } else {
 | |
|                 # is this file desired?
 | |
|                 my $want_this_one = 0;
 | |
|                 foreach my $file (@file_list) {
 | |
|                     $file =~ s{\\}{/}g if $ON_WINDOWS;
 | |
|                     if (/^$file/) {
 | |
|                         $want_this_one = 1;
 | |
|                         last;
 | |
|                     }
 | |
|                 }
 | |
|                 push @files, $F if $want_this_one;
 | |
|             }
 | |
|         }
 | |
|         close(FH);
 | |
|         if ($work_dir ne $start_dir) {
 | |
|             chdir $start_dir or die "Failed to chdir to $start_dir: $!";
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     # apply match/not-match file/dir filters to the list so far
 | |
|     my @post_filter = ();
 | |
|     foreach my $F (@files) {
 | |
|         if ($opt_match_f) {
 | |
|             push @post_filter, $F if basename($F) =~ m{$opt_match_f};
 | |
|             next;
 | |
|         }
 | |
|         if ($opt_match_d) {
 | |
|             push @post_filter, $F if $F =~ m{$opt_match_d};
 | |
|             next;
 | |
|         }
 | |
|         if (@opt_not_match_d) {
 | |
|             my $rule;
 | |
|             if ($opt_fullpath and any_match($F, 0, \$rule, @opt_not_match_d)) {
 | |
|                 $Ignored{$F} = "--not-match-d=$rule";
 | |
|                 next;
 | |
|             } elsif (!$opt_fullpath and any_match(basename($F), 0, \$rule, @opt_not_match_d)) {
 | |
|                 $Ignored{$F} = "--not-match-d (basename) =$rule";
 | |
|                 next;
 | |
|             }
 | |
|         }
 | |
|         if (@opt_not_match_f) {
 | |
|             my $rule;
 | |
|             if ($opt_fullpath and any_match($F, 0, \$rule, @opt_not_match_f)) {
 | |
|                 $Ignored{$F} = "--not-match-f=$rule";
 | |
|                 next;
 | |
|             } elsif (!$opt_fullpath and any_match(basename($F), 0, \$rule, @opt_not_match_f)) {
 | |
|                 $Ignored{$F} = "--not-match-f (basename) =$rule";
 | |
|                 next;
 | |
|             }
 | |
|         }
 | |
|         my $nBytes = get_size($F);
 | |
|         if (!$nBytes) {
 | |
|             $Ignored{$F} = 'zero sized file';
 | |
|             printf "files(%s)  zero size\n", $F if $opt_v > 5;
 | |
|         }
 | |
|         next unless $nBytes;
 | |
|         if ($nBytes > $opt_max_file_size*1024**2) {
 | |
|             $Ignored{$F} = "file size of " .
 | |
|                 $nBytes/1024**2 . " MB exceeds max file size of " .
 | |
|                 "$opt_max_file_size MB";
 | |
|             printf "file(%s)  exceeds $opt_max_file_size MB\n",
 | |
|                 $F if $opt_v > 5;
 | |
|             next;
 | |
|         }
 | |
|         my $is_bin = is_binary($F);
 | |
|         printf "files(%s)  size=%d  -B=%d\n",
 | |
|             $F, $nBytes, $is_bin if $opt_v > 5;
 | |
|         $is_bin = 0 if $opt_unicode and unicode_file($_);
 | |
|         $is_bin = 0 if $opt_read_binary_files;
 | |
|         next if $is_bin;
 | |
|         push @post_filter, $F;
 | |
|     }
 | |
|     print "<- invoke_generator\n" if $opt_v > 2;
 | |
|     return @post_filter;
 | |
| } # 1}}}
 | |
| sub any_match {                              # {{{1
 | |
|     my ($string, $entire, $rs_matched_pattern, @patterns) = @_;
 | |
|     print "-> any_match($string, $entire)\n" if $opt_v > 2;
 | |
|     foreach my $pattern (@patterns) {
 | |
|         if ($entire) {
 | |
|             if ($string =~ m{^${pattern}$}) {
 | |
|                 ${$rs_matched_pattern} = $pattern;
 | |
|                 print "<- any_match(1)\n" if $opt_v > 2;
 | |
|                 return 1;
 | |
|             }
 | |
|         } else {
 | |
|             if ($string =~ m{$pattern}) {
 | |
|                 ${$rs_matched_pattern} = $pattern;
 | |
|                 print "<- any_match(1)\n" if $opt_v > 2;
 | |
|                 return 1;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     print "<- any_match(0)\n" if $opt_v > 2;
 | |
|     return 0;
 | |
| }
 | |
| # }}}
 | |
| sub remove_duplicate_files {                 # {{{1
 | |
|     my ($fh                   , # in
 | |
|         $rh_Language          , # out
 | |
|         $rh_unique_source_file, # out
 | |
|         $rh_Err               , # in
 | |
|         $raa_errors           , # out  errors encountered
 | |
|         $rh_ignored           , # out
 | |
|         ) = @_;
 | |
| 
 | |
|     # Check for duplicate files by comparing file sizes.
 | |
|     # Where files are equally sized, compare their MD5 checksums.
 | |
|     print "-> remove_duplicate_files\n" if $opt_v > 2;
 | |
| 
 | |
|     my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ",";
 | |
|     my $n = 0;
 | |
|     my %files_by_size = (); # files_by_size{ # bytes } = [ list of files ]
 | |
|     seek($fh, 0, 0); # rewind to beginning of the temp file
 | |
|     while (<$fh>) {
 | |
|         ++$n;
 | |
|         my ($size_in_bytes, $language, $file) = split(/\Q$separator\E/, $_, 3);
 | |
|         if (!defined($size_in_bytes) or
 | |
|             !defined($language)      or
 | |
|             !defined($file)) {
 | |
|             print "-> remove_duplicate_files skipping error line [$_]\n"
 | |
|                 if $opt_v;
 | |
|             next;
 | |
|         }
 | |
|         chomp($file);
 | |
|         $file =~ s{\\}{/}g if $ON_WINDOWS;
 | |
|         $rh_Language->{$file} = $language;
 | |
|         push @{$files_by_size{$size_in_bytes}}, $file;
 | |
|         if ($opt_skip_uniqueness) {
 | |
|             $rh_unique_source_file->{$file} = 1;
 | |
|         }
 | |
|     }
 | |
|     return if $opt_skip_uniqueness;
 | |
|     if ($opt_progress_rate and ($n > $opt_progress_rate)) {
 | |
|         printf "Duplicate file check %d files (%d known unique)\r",
 | |
|             $n, scalar keys %files_by_size;
 | |
|     }
 | |
|     $n = 0;
 | |
|     foreach my $bytes (sort {$a <=> $b} keys %files_by_size) {
 | |
|         ++$n;
 | |
|         printf "Unique: %8d files                                          \r",
 | |
|             $n unless (!$opt_progress_rate or ($n % $opt_progress_rate));
 | |
|         if (scalar @{$files_by_size{$bytes}} == 1) {
 | |
|             # only one file is this big; must be unique
 | |
|             $rh_unique_source_file->{$files_by_size{$bytes}[0]} = 1;
 | |
|             next;
 | |
|         } else {
 | |
| #print "equally sized files: ",join(", ", @{$files_by_size{$bytes}}), "\n";
 | |
|             # Files in the list @{$files_by_size{$bytes} all are
 | |
|             # $bytes long.  Sort the list by file basename.
 | |
| 
 | |
|           # # sorting on basename causes repeatability problems
 | |
|           # # if the basename is not unique (eg "includeA/x.h"
 | |
|           # # and "includeB/x.h".  Instead, sort on full path.
 | |
|           # # Ref bug #114.
 | |
|           # my @sorted_bn = ();
 | |
|           # my %BN = map { basename($_) => $_ } @{$files_by_size{$bytes}};
 | |
|           # foreach my $F (sort keys %BN) {
 | |
|           #     push @sorted_bn, $BN{$F};
 | |
|           # }
 | |
| 
 | |
|             my @sorted_bn = sort @{$files_by_size{$bytes}};
 | |
| 
 | |
|             foreach my $F (different_files(\@sorted_bn  ,
 | |
|                                             $rh_Err     ,
 | |
|                                             $raa_errors ,
 | |
|                                             $rh_ignored ) ) {
 | |
|                 $rh_unique_source_file->{$F} = 1;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     print "<- remove_duplicate_files\n" if $opt_v > 2;
 | |
| } # 1}}}
 | |
| sub manual_find_preprocessor {               # {{{1
 | |
|     # When running with --follow-links, find_preprocessor() is not
 | |
|     # called by find().  Have to do it manually.  Inputs here
 | |
|     # are only files, which differs from find_preprocessor() which
 | |
|     # gets directories too.
 | |
|     # Reads global variable %Exclude_Dir.
 | |
|     # Populates global variable %Ignored.
 | |
|     # Reject files/directories in cwd which are in the exclude list.
 | |
|     print "-> manual_find_preprocessor(", cwd(), ")\n" if $opt_v > 2;
 | |
|     my @ok = ();
 | |
| 
 | |
|     foreach my $File (@_) {  # pure file or directory name, no separators
 | |
|         my $Dir = dirname($File);
 | |
|         my $got_exclude_dir = 0;
 | |
|         foreach my $d (File::Spec->splitdir( $Dir )) {
 | |
|             # tests/inputs/issues/407/level2/level/Test/level2 ->
 | |
|             # $d iterates over tests, inputs, issues, 407,
 | |
|             #                  level2, level, Test, level2
 | |
|             # check every item against %Exclude_Dir
 | |
|             if ($Exclude_Dir{$d}) {
 | |
|                 $got_exclude_dir = $d;
 | |
|                 last;
 | |
|             }
 | |
|         }
 | |
|         if ($got_exclude_dir) {
 | |
|             $Ignored{$File} = "--exclude-dir=$Exclude_Dir{$got_exclude_dir}";
 | |
| #print "ignoring $File\n";
 | |
|         } else {
 | |
|             if (@opt_not_match_d) {
 | |
|                 my $rule;
 | |
|                 if ($opt_fullpath) {
 | |
|                     if (any_match($Dir, 1, \$rule, @opt_not_match_d)) {
 | |
|                         $Ignored{$File} = "--not-match-d=$rule";
 | |
| #print "matched fullpath\n"
 | |
|                     } else {
 | |
|                         push @ok, $File;
 | |
|                     }
 | |
|                 } elsif (any_match(basename($Dir), 0, \$rule, @opt_not_match_d)) {
 | |
|                     $Ignored{$File} = "--not-match-d=$rule";
 | |
| #print "matched partial\n"
 | |
|                 } else {
 | |
|                     push @ok, $File;
 | |
|                 }
 | |
|             } else {
 | |
|                 push @ok, $File;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print "<- manual_find_preprocessor(@ok)\n" if $opt_v > 2;
 | |
|     return @ok;
 | |
| } # 1}}}
 | |
| sub find_preprocessor {                      # {{{1
 | |
|     # invoked by File::Find's find() each time it enters a new directory
 | |
|     # Reads global variable %Exclude_Dir.
 | |
|     # Populates global variable %Ignored.
 | |
|     # Reject files/directories in cwd which are in the exclude list.
 | |
|     print "-> find_preprocessor(", cwd(), ")\n" if $opt_v > 2;
 | |
|     my @ok = ();
 | |
| 
 | |
|     foreach my $F_or_D (@_) {  # pure file or directory name, no separators
 | |
|         next if $F_or_D =~ /^\.{1,2}$/;  # skip .  and  ..
 | |
|         if ($Exclude_Dir{$F_or_D}) {
 | |
|             $Ignored{$File::Find::name} = "--exclude-dir=$Exclude_Dir{$F_or_D}";
 | |
|         } else {
 | |
| #printf "  F_or_D=%-20s File::Find::name=%s\n", $F_or_D, $File::Find::name;
 | |
|             if (@opt_not_match_d) {
 | |
|                 my $rule;
 | |
|                 if ($opt_fullpath) {
 | |
|                     if (any_match($File::Find::name, 0, \$rule, @opt_not_match_d)) {
 | |
|                         $Ignored{$File::Find::name} = "--not-match-d=$rule";
 | |
|                     } else {
 | |
|                         push @ok, $F_or_D;
 | |
|                     }
 | |
|                 } elsif (!is_dir($F_or_D) and
 | |
|                          any_match($File::Find::name, 0, \$rule,
 | |
|                                    @opt_not_match_d)) {
 | |
|                     $Ignored{$File::Find::name} = "--not-match-d (basename) =$rule";
 | |
|                 } else {
 | |
|                     push @ok, $F_or_D;
 | |
|                 }
 | |
|             } else {
 | |
|                 push @ok, $F_or_D;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print "<- find_preprocessor(@ok)\n" if $opt_v > 2;
 | |
|     return @ok;
 | |
| } # 1}}}
 | |
| sub files {                                  # {{{1
 | |
|     # invoked by File::Find's find()   Populates global variable @file_list.
 | |
|     # See also find_preprocessor() which prunes undesired directories.
 | |
| 
 | |
| #   my $Dir = fastcwd();         # fully qualified path -- problematic
 | |
|     my $Dir = $File::Find::dir;  # path anchored to cloc's cwd
 | |
|     my $rule;
 | |
|     if ($opt_fullpath) {
 | |
|         # look at as much of the path as is known
 | |
|         if ($opt_match_f    ) {
 | |
|             return unless $File::Find::name =~ m{$opt_match_f};
 | |
|         }
 | |
|         if (@opt_not_match_f) {
 | |
|             return if any_match($File::Find::name, 0, \$rule, @opt_not_match_f);
 | |
|         }
 | |
|     } else {
 | |
|         # only look at the basename
 | |
|         if ($opt_match_f    ) { return unless /$opt_match_f/;     }
 | |
|         if (@opt_not_match_f) { return if     any_match($_, 0, \$rule, @opt_not_match_f)}
 | |
|     }
 | |
|     if ($opt_match_d) {
 | |
|         return unless "$Dir/" =~ m{$opt_match_d} or $Dir =~ m{$opt_match_d};
 | |
|     }
 | |
| 
 | |
|     my $nBytes = get_size($_);
 | |
|     if (!$nBytes and is_file($File::Find::name)) {
 | |
|         $Ignored{$File::Find::name} = 'zero sized file';
 | |
|         printf "files(%s)  zero size\n", $File::Find::name if $opt_v > 5;
 | |
|     }
 | |
|     return unless $nBytes  ; # attempting other tests w/pipe or socket will hang
 | |
|     if ($nBytes > $opt_max_file_size*1024**2) {
 | |
|         $Ignored{$File::Find::name} = "file size of " .
 | |
|             $nBytes/1024**2 . " MB exceeds max file size of " .
 | |
|             "$opt_max_file_size MB";
 | |
|         printf "file(%s)  exceeds $opt_max_file_size MB\n",
 | |
|             $File::Find::name if $opt_v > 5;
 | |
|         return;
 | |
|     }
 | |
|     my $is_dir = is_dir($_);
 | |
|     my $is_bin = is_binary($_);
 | |
|     printf "files(%s)  size=%d is_dir=%d  -B=%d\n",
 | |
|         $File::Find::name, $nBytes, $is_dir, $is_bin if $opt_v > 5;
 | |
|     $is_bin = 0 if $opt_unicode and unicode_file($_);
 | |
|     $is_bin = 0 if $opt_read_binary_files;
 | |
|     if ($is_bin and !$is_dir) {
 | |
|         $Ignored{$File::Find::name} = "binary file";
 | |
|         printf "files(%s)  binary file\n", $File::Find::name if $opt_v > 5;
 | |
|     }
 | |
|     return if $is_dir or $is_bin;
 | |
|     ++$nFiles_Found;
 | |
|     printf "%8d files\r", $nFiles_Found
 | |
|         unless (!$opt_progress_rate or ($nFiles_Found % $opt_progress_rate));
 | |
|     push @file_list, $File::Find::name;
 | |
| } # 1}}}
 | |
| sub archive_files {                          # {{{1
 | |
|     # invoked by File::Find's find()  Populates global variable @binary_archive
 | |
|     foreach my $ext (keys %Known_Binary_Archives) {
 | |
|         push @binary_archive, $File::Find::name
 | |
|             if $File::Find::name =~ m{$ext$};
 | |
|     }
 | |
| } # 1}}}
 | |
| sub open_file {                              # {{{1
 | |
|     # portable method to open a file. On Windows this uses Win32::LongPath to
 | |
|     # allow reading/writing files past the 255 char path length limit. When on
 | |
|     # other operating systems, $use_new_file can be used to specify opening a
 | |
|     # file with `new IO::File` instead of `open`. Note: `openL` doesn't support
 | |
|     # the C-like fopen modes ("w", "r+", etc.), it only supports Perl mode
 | |
|     # strings (">", "+<", etc.). So be sure to only use Perl mode strings to
 | |
|     # ensure compatibility. Additionally, openL doesn't handle pipe modes; if
 | |
|     # you need to open a pipe/STDIN/STDOUT, use the native `open` function.
 | |
|     my ($mode,         # Perl file mode; can not be C-style file mode
 | |
|         $filename,     # filename to open
 | |
|         $use_new_file, # whether to use `new IO::File` or `open` when not using Win32::LongPath
 | |
|         ) = @_;
 | |
|     if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | |
|         my $file = undef;
 | |
|         openL(\$file, $mode, $filename);
 | |
|         return $file;
 | |
|     } elsif ($use_new_file) {
 | |
|         return new IO::File $filename, $mode;
 | |
|     }
 | |
|     my $file = undef;
 | |
|     open($file, $mode, $filename);
 | |
|     return $file;
 | |
| } # 1}}}
 | |
| sub unlink_file {                            # {{{1
 | |
|     # portable method to unlink a file. On Windows this uses Win32::LongPath to
 | |
|     # allow unlinking files past the 255 char path length limit. Otherwise, the
 | |
|     # native `unlink` will be used.
 | |
|     my $filename = shift @_;
 | |
|     if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | |
|         return unlinkL($filename);
 | |
|     }
 | |
|     return unlink $filename;
 | |
| } # 1}}}
 | |
| sub is_binary {                              # {{{1
 | |
|     # portable method to test if item is a binary file. For Windows,
 | |
|     # Win32::LongPath doesn't provide a testL option for -B, but -B
 | |
|     # accepts a filehandle which does work with files opened with openL.
 | |
|     my $item = shift @_;
 | |
|     if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | |
|         my $IN = open_file('<', $item, 0);
 | |
|         if (defined $IN) {
 | |
|             my $res = -B $IN;
 | |
|             close($IN);
 | |
|             return $res;
 | |
|         }
 | |
|         return;
 | |
|     }
 | |
|     return (-B $item);
 | |
| } # 1}}}
 | |
| sub can_read {                               # {{{1
 | |
|     # portable method to test if item can be read
 | |
|     my $item = shift @_;
 | |
|     if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | |
|         return testL('r', $item);
 | |
|     }
 | |
|     return (-r $item);
 | |
| } # 1}}}
 | |
| sub get_size {                               # {{{1
 | |
|     # portable method to get size in bytes of a file
 | |
|     my $filename = shift @_;
 | |
|     if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | |
|         return testL('s', $filename);
 | |
|     }
 | |
|     return (-s $filename);
 | |
| } # 1}}}
 | |
| sub is_file {                                # {{{1
 | |
|     # portable method to test if item is a file
 | |
|     # (-f doesn't work in ActiveState Perl on Windows)
 | |
|     my $item = shift @_;
 | |
|     if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | |
|         return testL('f', $item);
 | |
|     }
 | |
|     return (-f $item);
 | |
| 
 | |
| #     Was:
 | |
| ####if ($ON_WINDOWS) {
 | |
| ####    my $mode = (stat $item)[2];
 | |
| ####       $mode = 0 unless $mode;
 | |
| ####    if ($mode & 0100000) { return 1; }
 | |
| ####    else                 { return 0; }
 | |
| ####} else {
 | |
| ####    return (-f $item);  # works on Unix, Linux, CygWin, z/OS
 | |
| ####}
 | |
| } # 1}}}
 | |
| sub is_dir {                                 # {{{1
 | |
|     my $item = shift @_;
 | |
|     if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
 | |
|         return testL('d', $item);
 | |
|     }
 | |
|     return (-d $item); # should work everywhere now (July 2017)
 | |
| 
 | |
| #     Was:
 | |
| ##### portable method to test if item is a directory
 | |
| ##### (-d doesn't work in older versions of ActiveState Perl on Windows)
 | |
| 
 | |
| ####if ($ON_WINDOWS) {
 | |
| ####    my $mode = (stat $item)[2];
 | |
| ####       $mode = 0 unless $mode;
 | |
| ####    if ($mode & 0040000) { return 1; }
 | |
| ####    else                 { return 0; }
 | |
| ####} else {
 | |
| ####    return (-d $item);  # works on Unix, Linux, CygWin, z/OS
 | |
| ####}
 | |
| } # 1}}}
 | |
| sub is_excluded {                            # {{{1
 | |
|     my ($file       , # in
 | |
|         $excluded   , # in   hash of excluded directories
 | |
|        ) = @_;
 | |
|     my($filename, $filepath, $suffix) = fileparse($file);
 | |
|     foreach my $path (sort keys %{$excluded}) {
 | |
|         return 1 if ($filepath =~ m{^\Q$path\E/}i);
 | |
|     }
 | |
| } # 1}}}
 | |
| sub classify_file {                          # {{{1
 | |
|     my ($full_file   , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|         $rh_ignored  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> classify_file($full_file)\n" if $opt_v > 2;
 | |
|     my $language = "(unknown)";
 | |
| 
 | |
|     if (basename($full_file) eq "-" && defined $opt_stdin_name) {
 | |
|        $full_file = $opt_stdin_name;
 | |
|     }
 | |
| 
 | |
|     my $look_at_first_line = 0;
 | |
|     my $file = basename $full_file;
 | |
|     if ($opt_autoconf and $file =~ /\.in$/) {
 | |
|        $file =~ s/\.in$//;
 | |
|     }
 | |
|     return $language if $Not_Code_Filename{$file}; # (unknown)
 | |
|     return $language if $file =~ m{~$}; # a temp edit file (unknown)
 | |
|     if (defined $Language_by_File{$file}) {
 | |
|         if      ($Language_by_File{$file} eq "Ant/XML") {
 | |
|             return Ant_or_XML(  $full_file, $rh_Err, $raa_errors);
 | |
|         } elsif ($Language_by_File{$file} eq "Maven/XML") {
 | |
|             return Maven_or_XML($full_file, $rh_Err, $raa_errors);
 | |
|         } else {
 | |
|             return $Language_by_File{$file};
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if ($file =~ /\.([^\.]+)$/) { # has an extension
 | |
|       print "$full_file extension=[$1]\n" if $opt_v > 2;
 | |
|       my $extension = $1;
 | |
|          # Windows file names are case insensitive so map
 | |
|          # all extensions to lowercase there.
 | |
|          $extension = lc $extension if $ON_WINDOWS or $opt_ignore_case_ext;
 | |
|       my @extension_list = ( $extension );
 | |
|       if ($file =~ /\.([^\.]+\.[^\.]+)$/) { # has a double extension
 | |
|           my $extension = $1;
 | |
|           $extension = lc $extension if $ON_WINDOWS or $opt_ignore_case_ext;
 | |
|           unshift @extension_list, $extension;  # examine double ext first
 | |
|       }
 | |
|       if ($file =~ /\.([^\.]+\.[^\.]+\.[^\.]+)$/) { # has a triple extension
 | |
|           my $extension = $1;
 | |
|           $extension = lc $extension if $ON_WINDOWS or $opt_ignore_case_ext;
 | |
|           unshift @extension_list, $extension;  # examine triple ext first
 | |
|       }
 | |
|       foreach my $extension (@extension_list) {
 | |
|         if ($Not_Code_Extension{$extension} and
 | |
|            !$Forced_Extension{$extension}) {
 | |
|            # If .1 (for example) is an extension that would ordinarily be
 | |
|            # ignored but the user has insisted this be counted with the
 | |
|            # --force-lang option, then go ahead and count it.
 | |
|             $rh_ignored->{$full_file} =
 | |
|                 'listed in $Not_Code_Extension{' . $extension . '}';
 | |
|             return $language;
 | |
|         }
 | |
|         # handle extension collisions
 | |
|         if (defined $Language_by_Extension{$extension}) {
 | |
|             if ($Language_by_Extension{$extension} eq
 | |
|                 'MATLAB/Mathematica/Objective-C/MUMPS/Mercury') {
 | |
|                 my $lang_M_or_O = "";
 | |
|                 matlab_or_objective_C($full_file ,
 | |
|                                       $rh_Err    ,
 | |
|                                       $raa_errors,
 | |
|                                      \$lang_M_or_O);
 | |
|                 if ($lang_M_or_O) {
 | |
|                     return $lang_M_or_O;
 | |
|                 } else { # an error happened in matlab_or_objective_C()
 | |
|                     $rh_ignored->{$full_file} =
 | |
|                         'failure in matlab_or_objective_C($full_file)';
 | |
|                     return $language; # (unknown)
 | |
|                 }
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'PHP/Pascal/Fortran') {
 | |
|                 my $lang_F_or_P_or_P = "";
 | |
|                 php_pascal_or_fortran($full_file ,
 | |
|                                       $rh_Err    ,
 | |
|                                       $raa_errors,
 | |
|                                      \$lang_F_or_P_or_P);
 | |
|                 if ($lang_F_or_P_or_P) {
 | |
|                     return $lang_F_or_P_or_P;
 | |
|                 } else { # an error happened in php_pascal_or_fortran()
 | |
|                     $rh_ignored->{$full_file} =
 | |
|                         'failure in php_pascal_or_fortran($full_file)';
 | |
|                     return $language; # (unknown)
 | |
|                 }
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'Pascal/Puppet') {
 | |
|                 my $lang_Pasc_or_Pup = "";
 | |
|                 pascal_or_puppet(     $full_file ,
 | |
|                                       $rh_Err    ,
 | |
|                                       $raa_errors,
 | |
|                                      \$lang_Pasc_or_Pup);
 | |
|                 if ($lang_Pasc_or_Pup) {
 | |
|                     return $lang_Pasc_or_Pup;
 | |
|                 } else { # an error happened in pascal_or_puppet()
 | |
|                     $rh_ignored->{$full_file} =
 | |
|                         'failure in pascal_or_puppet()';
 | |
|                     return $language; # (unknown)
 | |
|                 }
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'Lisp/OpenCL') {
 | |
|                 return Lisp_or_OpenCL($full_file, $rh_Err, $raa_errors);
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'Lisp/Julia') {
 | |
|                 return Lisp_or_Julia( $full_file, $rh_Err, $raa_errors);
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'Perl/Prolog') {
 | |
|                 return Perl_or_Prolog($full_file, $rh_Err, $raa_errors);
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'Raku/Prolog') {
 | |
|                 return Raku_or_Prolog($full_file, $rh_Err, $raa_errors);
 | |
|             } elsif ($Language_by_Extension{$extension} eq
 | |
|                      'IDL/Qt Project/Prolog/ProGuard') {
 | |
|                 return IDL_or_QtProject($full_file, $rh_Err, $raa_errors);
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'D/dtrace') {
 | |
|                 # is it D or an init.d shell script?
 | |
|                 my $a_script = really_is_D($full_file, $rh_Err, $raa_errors);
 | |
|                 if ($a_script) {
 | |
|                     # could be dtrace, sh, bash or anything one would
 | |
|                     # write an init.d script in
 | |
|                     if (defined $Language_by_Script{$a_script}) {
 | |
|                         return $Language_by_Script{$a_script};
 | |
|                     } else {
 | |
|                         $rh_ignored->{$full_file} =
 | |
|                             "Unrecognized script language, '$a_script'";
 | |
|                     }
 | |
|                 } else {
 | |
|                     return 'D';
 | |
|                 }
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'Fortran 77/Forth') {
 | |
|                 return Forth_or_Fortran($full_file, $rh_Err, $raa_errors);
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'F#/Forth') {
 | |
|                 return Forth_or_Fsharp( $full_file, $rh_Err, $raa_errors);
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'Verilog-SystemVerilog/Coq') {
 | |
|                 return Verilog_or_Coq( $full_file, $rh_Err, $raa_errors);
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'Smarty') {
 | |
|                 if ($extension ne "tpl") {
 | |
|                     # unambiguous -- if ends with .smarty, is Smarty
 | |
|                     return $Language_by_Extension{$extension};
 | |
|                 }
 | |
|                 # Smarty extension .tpl is generic; make sure the
 | |
|                 # file at least roughly resembles PHP.  Alternatively,
 | |
|                 # if the user forces the issue, do the count.
 | |
|                 my $force_smarty = 0;
 | |
|                 foreach (@opt_force_lang) {
 | |
|                     if (lc($_) eq "smarty,tpl") {
 | |
|                         $force_smarty = 1;
 | |
|                         last;
 | |
|                     }
 | |
|                 }
 | |
|                 if (really_is_smarty($full_file) or $force_smarty) {
 | |
|                     return 'Smarty';
 | |
|                 } else {
 | |
|                     return $language; # (unknown)
 | |
|                 }
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'TypeScript/Qt Linguist') {
 | |
|                 return TypeScript_or_QtLinguist( $full_file, $rh_Err, $raa_errors);
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'Qt/Glade') {
 | |
|                 return Qt_or_Glade( $full_file, $rh_Err, $raa_errors);
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'C#/Smalltalk') {
 | |
|                 my $L = Csharp_or_Smalltalk( $full_file, $rh_Err, $raa_errors);
 | |
|                 if ($L eq 'C#') {
 | |
|                     my $lines = first_line($full_file, 2, $rh_Err, $raa_errors);
 | |
|                     $lines =~ s/\n//mg;
 | |
|                     if ($lines =~ m[//\s+<auto-generated>]) {
 | |
|                         $L = "C# Generated";
 | |
|                         if ($opt_no_autogen) {
 | |
|                             $rh_ignored->{$full_file} =
 | |
|                                 '--no-autogen ignores this auto-generated C# file';
 | |
|                             $L = "(unknown)"; # forces it to be ignored
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|                 return $L;
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'Scheme/SaltStack') {
 | |
|                 return Scheme_or_SaltStack( $full_file, $rh_Err, $raa_errors);
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'Visual Basic/TeX/Apex Class') {
 | |
|                 my $lang_VB_T_A = "";
 | |
|                 Visual_Basic_or_TeX_or_Apex($full_file ,
 | |
|                                             $rh_Err    ,
 | |
|                                             $raa_errors,
 | |
|                                            \$lang_VB_T_A);
 | |
|                 if ($lang_VB_T_A) {
 | |
|                     return $lang_VB_T_A;
 | |
|                 } else { # an error happened in Visual_Basic_or_TeX_or_Apex
 | |
|                     $rh_ignored->{$full_file} =
 | |
|                         'failure in Visual_Basic_or_TeX_or_Apex()';
 | |
|                     return $language; # (unknown)
 | |
|                 }
 | |
|             } elsif ($Language_by_Extension{$extension} eq 'Brainfuck') {
 | |
|                 if (really_is_bf($full_file)) {
 | |
|                     return $Language_by_Extension{$extension};
 | |
|                 } else {
 | |
|                     return $language; # (unknown)
 | |
|                 }
 | |
|             } else {
 | |
|                 return $Language_by_Extension{$extension};
 | |
|             }
 | |
|         } else { # has an unmapped file extension
 | |
|             $look_at_first_line = 1;
 | |
|         }
 | |
|       }
 | |
|       # if all else fails look at the prefix instead of extension
 | |
|       ( my $stem = $file ) =~ s/^(.*?)\.\S+$/$1/;
 | |
|       if ($stem and defined($Language_by_Prefix{$stem})) {
 | |
|           return $Language_by_Prefix{$stem}
 | |
|       }
 | |
|     } elsif (defined $Language_by_File{lc $file}) {
 | |
|         return $Language_by_File{lc $file};
 | |
|     } elsif ($opt_lang_no_ext and
 | |
|              defined $Filters_by_Language{$opt_lang_no_ext}) {
 | |
|         return $opt_lang_no_ext;
 | |
|     } else {  # no file extension
 | |
|         $look_at_first_line = 1;
 | |
|     }
 | |
| 
 | |
|     if ($look_at_first_line) {
 | |
|         # maybe it is a shell/Perl/Python/Ruby/etc script that
 | |
|         # starts with pound bang:
 | |
|         #   #!/usr/bin/perl
 | |
|         #   #!/usr/bin/env perl
 | |
|         my ($script_language, $L) = peek_at_first_line($full_file ,
 | |
|                                                        $rh_Err    ,
 | |
|                                                        $raa_errors);
 | |
|         if (!$script_language) {
 | |
|             $rh_ignored->{$full_file} = "language unknown (#2)";
 | |
|             # returns (unknown)
 | |
|         }
 | |
|         if (defined $Language_by_Script{$script_language}) {
 | |
|             if (defined $Filters_by_Language{
 | |
|                             $Language_by_Script{$script_language}}) {
 | |
|                 $language = $Language_by_Script{$script_language};
 | |
|             } else {
 | |
|                 $rh_ignored->{$full_file} =
 | |
|                     "undefined:  Filters_by_Language{" .
 | |
|                     $Language_by_Script{$script_language} .
 | |
|                     "} for scripting language $script_language";
 | |
|                 # returns (unknown)
 | |
|             }
 | |
|         } else {
 | |
|             # #456:  XML files can have a variety of domain-specific file
 | |
|             #        extensions.  If the extension is unrecognized, examine
 | |
|             #        the first line of the file to see if it is XML
 | |
|             if ($L =~ /<\?xml\s/) {
 | |
|                 $language = "XML";
 | |
|                 delete $rh_ignored->{$full_file};
 | |
|             } else {
 | |
|                 $rh_ignored->{$full_file} = "language unknown (#3)";
 | |
|             }
 | |
|             # returns (unknown)
 | |
|         }
 | |
|     }
 | |
|     print "<- classify_file($full_file)=$language\n" if $opt_v > 2;
 | |
|     return $language;
 | |
| } # 1}}}
 | |
| sub first_line {                             # {{{1
 | |
|     # return the first $n_lines of text in the file as one string
 | |
|     my ($file        , # in
 | |
|         $n_lines     , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
|     my $line = "";
 | |
|     print "-> first_line($file, $n_lines)\n" if $opt_v > 2;
 | |
|     if (!can_read($file)) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return $line;
 | |
|     }
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         print "<- first_line($file, $n_lines)\n" if $opt_v > 2;
 | |
|         return $line;
 | |
|     }
 | |
|     # issue 644: Unicode files can have non-zero $n_lines
 | |
|     # but empty <$IN> contents
 | |
|     for (my $i = 0; $i < $n_lines; $i++) {
 | |
|         my $L = <$IN>;
 | |
|         last unless defined $L;
 | |
|         chomp($line .= $L);
 | |
|     }
 | |
|     $IN->close;
 | |
|     print "<- first_line($file, $n_lines, '$line')\n" if $opt_v > 2;
 | |
|     return $line;
 | |
| } # 1}}}
 | |
| sub peek_at_first_line {                     # {{{1
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> peek_at_first_line($file)\n" if $opt_v > 2;
 | |
| 
 | |
|     my $script_language = "";
 | |
|     my $first_line = first_line($file, 1, $rh_Err, $raa_errors);
 | |
| 
 | |
|     if (defined $first_line) {
 | |
| #print "peek_at_first_line of [$file] first_line=[$first_line]\n";
 | |
|         if ($first_line =~ /^#\!\s*(\S.*?)$/) {
 | |
| #print "peek_at_first_line 1=[$1]\n";
 | |
|             my @pound_bang = split(' ', $1);
 | |
| #print "peek_at_first_line basename 0=[", basename($pound_bang[0]), "]\n";
 | |
|             if (basename($pound_bang[0]) eq "env" and
 | |
|                 scalar @pound_bang > 1) {
 | |
|                 $script_language = $pound_bang[1];
 | |
| #print "peek_at_first_line pound_bang A $pound_bang[1]\n";
 | |
|             } else {
 | |
|                 $script_language = basename $pound_bang[0];
 | |
| #print "peek_at_first_line pound_bang B $script_language\n";
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     print "<- peek_at_first_line($file)\n" if $opt_v > 2;
 | |
|     return ($script_language, $first_line);
 | |
| } # 1}}}
 | |
| sub different_files {                        # {{{1
 | |
|     # See which of the given files are unique by computing each file's MD5
 | |
|     # sum.  Return the subset of files which are unique.
 | |
|     my ($ra_files    , # in
 | |
|         $rh_Err      , # in
 | |
|         $raa_errors  , # out
 | |
|         $rh_ignored  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> different_files(@{$ra_files})\n" if $opt_v > 2;
 | |
|     my %file_hash = ();  # file_hash{md5 hash} = [ file1, file2, ... ]
 | |
|     foreach my $F (@{$ra_files}) {
 | |
|         next if is_dir($F);  # needed for Windows
 | |
|         my $IN = open_file('<', $F, 1);
 | |
|         if (!defined $IN) {
 | |
|             push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $F];
 | |
|             $rh_ignored->{$F} = 'cannot read';
 | |
|         } else {
 | |
|             if ($HAVE_Digest_MD5) {
 | |
|                 binmode $IN;
 | |
|                 my $MD5 = Digest::MD5->new->addfile($IN)->hexdigest;
 | |
| #print "$F, $MD5\n";
 | |
|                 push @{$file_hash{$MD5}}, $F;
 | |
|             } else {
 | |
|                 # all files treated unique
 | |
|                 push @{$file_hash{$F}}, $F;
 | |
|             }
 | |
|             $IN->close;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     # Loop over file sets having identical MD5 sums.  Within
 | |
|     # each set, pick the file that most resembles known source
 | |
|     # code.
 | |
|     my @unique = ();
 | |
|     for my $md5 (sort keys %file_hash) {
 | |
|         my $i_best = 0;
 | |
|         for (my $i = 1; $i < scalar(@{$file_hash{$md5}}); $i++) {
 | |
|             my $F = $file_hash{$md5}[$i];
 | |
|             my (@nul_a, %nul_h);
 | |
|             my $language = classify_file($F, $rh_Err,
 | |
|                                         # don't save these errors; pointless
 | |
|                                         \@nul_a, \%nul_h);
 | |
|             $i_best = $i if $language ne "(unknown)";
 | |
|         }
 | |
|         # keep the best one found and identify the rest as ignored
 | |
|         for (my $i = 0; $i < scalar(@{$file_hash{$md5}}); $i++) {
 | |
|             if ($i == $i_best) {
 | |
|                 push @unique, $file_hash{$md5}[$i_best];
 | |
|             } else {
 | |
|                 $rh_ignored->{$file_hash{$md5}[$i]} = "duplicate of " .
 | |
|                     $file_hash{$md5}[$i_best];
 | |
|             }
 | |
|         }
 | |
| 
 | |
|     }
 | |
|     print "<- different_files(@unique)\n" if $opt_v > 2;
 | |
|     return @unique;
 | |
| } # 1}}}
 | |
| sub call_counter {                           # {{{1
 | |
|     my ($file     , # in
 | |
|         $language , # in
 | |
|         $ra_Errors, # out
 | |
|        ) = @_;
 | |
| 
 | |
|     # Logic:  pass the file through the following filters:
 | |
|     #         1. remove leading lines (if --skip-leading)
 | |
|     #         2. remove blank lines
 | |
|     #         3. remove comments using each filter defined for this language
 | |
|     #            (example:  SQL has two, remove_starts_with(--) and
 | |
|     #             remove_c_comments() )
 | |
|     #         4. compute comment lines as
 | |
|     #               total lines - blank lines - lines left over after all
 | |
|     #                   comment filters have been applied
 | |
| 
 | |
|     print "-> call_counter($file, $language)\n" if $opt_v > 2;
 | |
| #print "call_counter:  ", Dumper(@routines), "\n";
 | |
| 
 | |
|     my @lines = ();
 | |
|     my $ascii = "";
 | |
|     if (is_binary($file) and $opt_unicode) {
 | |
|         # was binary so must be unicode
 | |
| 
 | |
|         $/ = undef;
 | |
|         my $IN = open_file('<', $file, 1);
 | |
|         my $bin_text = <$IN>;
 | |
|         $IN->close;
 | |
|         $/ = "\n";
 | |
| 
 | |
|         $ascii = unicode_to_ascii( $bin_text );
 | |
|         @lines = split("\n", $ascii );
 | |
|         foreach (@lines) { $_ = "$_\n"; }
 | |
| 
 | |
|     } else {
 | |
|         # regular text file
 | |
|         @lines = read_file($file);
 | |
|         $ascii = join('', @lines);
 | |
|     }
 | |
| 
 | |
|     # implement --perl-ignore-data here
 | |
| 
 | |
|     if ($opt_skip_leading) {
 | |
|         my $strip = 1;
 | |
|         my ($N, @exts) = split(/,/, $opt_skip_leading);
 | |
|         if (@exts) {
 | |
|             # only apply if this file's extension is listed
 | |
|             my $this_file_ext = file_extension($file);
 | |
|             $strip = grep(/^${this_file_ext}$/, @exts);
 | |
|         }
 | |
|         @lines = remove_first_n($N, \@lines) if $strip;
 | |
|     }
 | |
| 
 | |
|     my @original_lines = @lines;
 | |
|     my $total_lines    = scalar @lines;
 | |
| 
 | |
|     print_lines($file, "Original file:", \@lines) if $opt_print_filter_stages;
 | |
|     @lines = rm_blanks(\@lines, $language, \%EOL_Continuation_re); # remove blank lines
 | |
|     my $blank_lines = $total_lines - scalar @lines;
 | |
|     print "   call_counter: total_lines=$total_lines  blank_lines=",
 | |
|         $blank_lines, "\n" if $opt_v > 2;
 | |
|     print_lines($file, "Blank lines removed:", \@lines)
 | |
|         if $opt_print_filter_stages;
 | |
| 
 | |
|     @lines = rm_comments(\@lines, $language, $file,
 | |
|                                \%EOL_Continuation_re, $ra_Errors);
 | |
| 
 | |
|     my $comment_lines = $total_lines - $blank_lines - scalar  @lines;
 | |
|     if ($opt_strip_comments) {
 | |
|         my $stripped_file = "";
 | |
|         if ($opt_original_dir) {
 | |
|             $stripped_file =          $file . ".$opt_strip_comments";
 | |
|         } else {
 | |
|             $stripped_file = basename $file . ".$opt_strip_comments";
 | |
|         }
 | |
|         write_file($stripped_file, {}, @lines);
 | |
|     }
 | |
|     if ($opt_strip_code) {
 | |
|         my $stripped_file = "";
 | |
|         if ($opt_original_dir) {
 | |
|             $stripped_file =          $file . ".$opt_strip_code";
 | |
|         } else {
 | |
|             $stripped_file = basename $file . ".$opt_strip_code";
 | |
|         }
 | |
|         write_file($stripped_file, {}, rm_code(\@original_lines, \@lines));
 | |
|     }
 | |
|     if ($opt_html and !$opt_diff) {
 | |
|         chomp(@original_lines);  # includes blank lines, comments
 | |
|         chomp(@lines);           # no blank lines, no comments
 | |
| 
 | |
|         my (@diff_L, @diff_R, %count);
 | |
| 
 | |
|         # remove blank lines to get better quality diffs; count
 | |
|         # blank lines separately
 | |
|         my @original_lines_minus_white = ();
 | |
|         # however must keep track of how many blank lines were removed and
 | |
|         # where they were removed so that the HTML display can include it
 | |
|         my %blank_line  = ();
 | |
|         my $insert_line = 0;
 | |
|         foreach (@original_lines) {
 | |
|             if (/^\s*$/) {
 | |
|                ++$count{blank}{same};
 | |
|                ++$blank_line{ $insert_line };
 | |
|             } else {
 | |
|                 ++$insert_line;
 | |
|                 push @original_lines_minus_white, $_;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         array_diff( $file                       ,   # in
 | |
|                    \@original_lines_minus_white ,   # in
 | |
|                    \@lines                      ,   # in
 | |
|                    "comment"                    ,   # in
 | |
|                    \@diff_L, \@diff_R,          ,   # out
 | |
|                     $ra_Errors);                    # in/out
 | |
|         write_comments_to_html($file, \@diff_L, \@diff_R, \%blank_line);
 | |
| #print Dumper("count", \%count);
 | |
|     }
 | |
| 
 | |
|     print "<- call_counter($total_lines, $blank_lines, $comment_lines)\n"
 | |
|         if $opt_v > 2;
 | |
|     return ($total_lines, $blank_lines, $comment_lines);
 | |
| } # 1}}}
 | |
| sub windows_glob {                           # {{{1
 | |
|     # Windows doesn't expand wildcards.  Use code from Sean M. Burke's
 | |
|     # Win32::Autoglob module to do this.
 | |
|     return map {;
 | |
|         ( defined($_) and m/[\*\?]/ ) ? sort(glob($_)) : $_
 | |
|           } @_;
 | |
| } # 1}}}
 | |
| sub write_file {                             # {{{1
 | |
|     my ($file       , # in
 | |
|         $rh_options , # in
 | |
|         @lines      , # in
 | |
|        ) = @_;
 | |
|     # If $file is a conventional scalar, it is the name of the file to write to.
 | |
|     # if $file is a reference to a scalar, rather than writing @lines to a file,
 | |
|     # write @lines to this scalar as a single string.
 | |
| 
 | |
|     my $local_formatting = 0;
 | |
|     foreach my $opt (sort keys %{$rh_options}) {
 | |
| #       print "write_file option $opt = $rh_options->{$opt}\n";
 | |
|         $local_formatting = 1;
 | |
|     }
 | |
|     my $write_to_file = ref($file) eq "" ? 1 : 0;
 | |
| 
 | |
| #print "write_file 1 [$file]\n";
 | |
|     # Do ~ expansion (by Tim LaBerge, fixes bug 2787984)
 | |
|     if ($write_to_file) {
 | |
|         my $preglob_filename = $file;
 | |
|     #print "write_file 2 [$preglob_filename]\n";
 | |
|         if ($ON_WINDOWS) {
 | |
|             $file = (windows_glob($file))[0];
 | |
|         } else {
 | |
|             $file = File::Glob::bsd_glob($file);
 | |
|         }
 | |
|     #print "write_file 3 [$file]\n";
 | |
|         $file = $preglob_filename unless $file;
 | |
|     #print "write_file 4 [$file]\n";
 | |
|     }
 | |
| 
 | |
|     if ($write_to_file) {
 | |
|         print "-> write_file($file)\n" if $opt_v > 2;
 | |
|     } else {
 | |
|         print "-> write_file() -- writing to string variable\n" if $opt_v > 2;
 | |
|     }
 | |
| 
 | |
|     my @prt_lines = ();
 | |
| 
 | |
|     my $n_col = undef;
 | |
|     if ($local_formatting) {
 | |
|         $n_col = scalar @{$rh_options->{'columns'}};
 | |
|         if ($opt_xml) {
 | |
|             push @prt_lines, '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
 | |
|             push @prt_lines, "<all_$rh_options->{'file_type'}>\n";
 | |
|         } elsif ($opt_yaml) {
 | |
|             push @prt_lines, "---\n";
 | |
|         } elsif ($opt_md) {
 | |
|             push @prt_lines, join("|", @{$rh_options->{'columns'}})  . "\n";
 | |
|             push @prt_lines, join("|", map( ":------", 1 .. $n_col)) . "\n";
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     chomp(@lines);
 | |
| 
 | |
|     if ($local_formatting) {
 | |
|         my @json_lines = ();
 | |
|         foreach my $L (@lines) {
 | |
|             my @entries;
 | |
|             if ($rh_options->{'separator'}) {
 | |
|                 @entries = split($rh_options->{'separator'}, $L, $n_col);
 | |
|             } else {
 | |
|                 @entries = ( $L );
 | |
|             }
 | |
|             if ($opt_xml) {
 | |
|                 push @prt_lines, "  <$rh_options->{'file_type'} ";
 | |
|                 for (my $i = 0; $i < $n_col; $i++) {
 | |
|                     push @prt_lines, sprintf("%s=\"%s\" ", $rh_options->{'columns'}[$i], $entries[$i]);
 | |
|                 }
 | |
|                 push @prt_lines, "/>\n";
 | |
|             } elsif ($opt_yaml or $opt_json) {
 | |
|                 my @pairs = ();
 | |
|                 for (my $i = 0; $i < $n_col; $i++) {
 | |
|                     push @pairs,
 | |
|                         sprintf "\"%s\":\"%s\"", $rh_options->{'columns'}[$i], $entries[$i];
 | |
|                 }
 | |
|                 if ($opt_json) {
 | |
|                     # JSON can't literal '\x' in filenames, #575
 | |
|                     $pairs[0] =~ s/\\x//g;
 | |
|                     push @json_lines, join(", ", @pairs );
 | |
|                 } else {
 | |
|                     push @prt_lines, "- {", join(", ", @pairs) . "}\n";
 | |
|                 }
 | |
|             } elsif ($opt_csv) {
 | |
|                 push @prt_lines, join(",", @entries) . "\n";
 | |
|             } elsif ($opt_md) {
 | |
|                 push @prt_lines, join("|", @entries) . "\n";
 | |
|             }
 | |
|         }
 | |
|         if ($opt_json) {
 | |
|             push @prt_lines, "[{" . join("},\n {", @json_lines) . "}]\n";
 | |
|         }
 | |
|         if (!$opt_json and !$opt_yaml and !$opt_xml and !$opt_csv) {
 | |
|             push @prt_lines, join("\n", @lines) . "\n";
 | |
|         }
 | |
|     } else {
 | |
|         push @prt_lines, join("\n", @lines) . "\n";
 | |
|     }
 | |
| 
 | |
|     if ($local_formatting and $opt_xml) {
 | |
|         push @prt_lines, "</all_$rh_options->{'file_type'}>\n";
 | |
|     }
 | |
| 
 | |
|     # Create the destination directory if it doesn't already exist.
 | |
|     my $abs_file_path = File::Spec->rel2abs( $file );
 | |
|     my ($volume, $directories, $filename) = File::Spec->splitpath( $abs_file_path );
 | |
|     mkpath($volume . $directories, 1, 0777);
 | |
| 
 | |
|     my $OUT = undef;
 | |
|     unlink_file($file);
 | |
|     if ($opt_file_encoding) {
 | |
|         $OUT = open_file(">:encoding($opt_file_encoding)", $file, 0);
 | |
|     } else {
 | |
|         $OUT = open_file('>', $file, 1);
 | |
|     }
 | |
|     if (!defined $OUT) {
 | |
|         warn "Unable to write to $file\n";
 | |
|         print "<- write_file\n" if $opt_v > 2;
 | |
|         return;
 | |
|     }
 | |
|     print $OUT @prt_lines;
 | |
|     $OUT->close;
 | |
| 
 | |
|     if (can_read($file)) {
 | |
|         print "Wrote $file" unless $opt_quiet or $opt_fmt > 0;
 | |
|         print ", $CLOC_XSL" if $opt_xsl and $opt_xsl eq $CLOC_XSL;
 | |
|         print "\n" unless $opt_quiet;
 | |
|     }
 | |
| 
 | |
|     print "<- write_file\n" if $opt_v > 2;
 | |
| } # 1}}}
 | |
| sub file_pairs_from_file {                   # {{{1
 | |
|     my ($file             , # in
 | |
|         $ra_added         , # out
 | |
|         $ra_removed       , # out
 | |
|         $ra_compare_list  , # out
 | |
|        ) = @_;
 | |
|     #
 | |
|     # Example valid input format for $file
 | |
|     # 1)
 | |
|     #   A/d1/hello.f90 | B/d1/hello.f90
 | |
|     #   A/hello.C | B/hello.C
 | |
|     #   A/d2/hi.py | B/d2/hi.py
 | |
|     #
 | |
|     # 2)
 | |
|     # Files added: 1
 | |
|     #   + B/extra_file.pl ; Perl
 | |
|     #
 | |
|     # Files removed: 1
 | |
|     #   - A/d2/hello.java ; Java
 | |
|     #
 | |
|     # File pairs compared: 3
 | |
|     #   != A/d1/hello.f90 | B/d1/hello.f90 ; Fortran 90
 | |
|     #   != A/hello.C | B/hello.C ; C++
 | |
|     #   == A/d2/hi.py | B/d2/hi.py ; Python
 | |
| 
 | |
|     print "-> file_pairs_from_file($file)\n" if $opt_v and $opt_v > 2;
 | |
|     @{$ra_compare_list} = ();
 | |
|     my @lines = read_file($file);
 | |
|     my $mode = "compare";
 | |
|     foreach my $L (@lines) {
 | |
|         next if $L =~ /^\s*$/ or $L =~ /^\s*#/;
 | |
|         chomp($L);
 | |
|         if      ($L =~ /^Files\s+(added|removed):/) {
 | |
|             $mode = $1;
 | |
|         } elsif ($L =~ /^File\s+pairs\s+compared:/) {
 | |
|             $mode = "compare";
 | |
|         } elsif ($mode eq "added" or $mode eq "removed") {
 | |
|             $L =~ m/^\s*[+-]\s+(.*?)\s+;/;
 | |
|             my $F = $1;
 | |
|             if (!defined $1) {
 | |
|                 warn "file_pairs_from_file($file) parse failure\n",
 | |
|                      "in $mode mode for '$L', ignoring\n";
 | |
|                 next;
 | |
|             }
 | |
|             if ($mode eq "added") {
 | |
|                 push @{$ra_added}  , $F;
 | |
|             } else {
 | |
|                 push @{$ra_removed}, $F;
 | |
|             }
 | |
|         } else {
 | |
|             $L =~ m/^\s*([!=]=\s*)?(.*?)\s*\|\s*(.*?)\s*(;.*?)?$/;
 | |
|             if (!defined $2 or !defined $3) {
 | |
|                 warn "file_pairs_from_file($file) parse failure\n",
 | |
|                      "in compare mode for '$L', ignoring\n";
 | |
|                 next;
 | |
|             }
 | |
|             push @{$ra_compare_list}, ( [$2, $3] );
 | |
|         }
 | |
|     }
 | |
|     print "<- file_pairs_from_file\n" if $opt_v and $opt_v > 2;
 | |
| }
 | |
| sub read_file  {                             # {{{1
 | |
|     my ($file, ) = @_;
 | |
|     print "-> read_file($file)\n" if $opt_v and $opt_v > 2;
 | |
|     my %BoM = (
 | |
|         "fe ff"           => 2 ,
 | |
|         "ff fe"           => 2 ,
 | |
|         "ef bb bf"        => 3 ,
 | |
|         "f7 64 4c"        => 3 ,
 | |
|         "0e fe ff"        => 3 ,
 | |
|         "fb ee 28"        => 3 ,
 | |
|         "00 00 fe ff"     => 4 ,
 | |
|         "ff fe 00 00"     => 4 ,
 | |
|         "2b 2f 76 38"     => 4 ,
 | |
|         "2b 2f 76 39"     => 4 ,
 | |
|         "2b 2f 76 2b"     => 4 ,
 | |
|         "2b 2f 76 2f"     => 4 ,
 | |
|         "dd 73 66 73"     => 4 ,
 | |
|         "84 31 95 33"     => 4 ,
 | |
|         "2b 2f 76 38 2d"  => 5 ,
 | |
|         );
 | |
| 
 | |
|     my @lines = ();
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (defined $IN) {
 | |
|         @lines = <$IN>;
 | |
|         $IN->close;
 | |
|         if ($lines[$#lines]) {  # test necessary for zero content files
 | |
|                                 # (superfluous?)
 | |
|             # Some files don't end with a new line.  Force this:
 | |
|             $lines[$#lines] .= "\n" unless $lines[$#lines] =~ m/\n$/;
 | |
|         }
 | |
|     } else {
 | |
|         warn "Unable to read $file\n";
 | |
|     }
 | |
| 
 | |
|     # Are first few characters of the file Unicode Byte Order
 | |
|     # Marks (http://en.wikipedia.org/wiki/Byte_Order_Mark)?
 | |
|     # If yes, remove them.
 | |
|     if (@lines) {
 | |
|         my @chrs   = split('', $lines[0]);
 | |
|         my $n_chrs = scalar @chrs;
 | |
|         my ($n2, $n3, $n4, $n5) = ('', '', '', '');
 | |
|         $n2 = sprintf("%x %x", map  ord, @chrs[0,1]) if $n_chrs >= 2;
 | |
|         $n3 = sprintf("%s %x", $n2, ord  $chrs[2])   if $n_chrs >= 3;
 | |
|         $n4 = sprintf("%s %x", $n3, ord  $chrs[3])   if $n_chrs >= 4;
 | |
|         $n5 = sprintf("%s %x", $n4, ord  $chrs[4])   if $n_chrs >= 5;
 | |
|         if      (defined $BoM{$n2}) { $lines[0] = substr $lines[0], 2;
 | |
|         } elsif (defined $BoM{$n3}) { $lines[0] = substr $lines[0], 3;
 | |
|         } elsif (defined $BoM{$n4}) { $lines[0] = substr $lines[0], 4;
 | |
|         } elsif (defined $BoM{$n5}) { $lines[0] = substr $lines[0], 5;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     # Trim DOS line endings.  This allows Windows files
 | |
|     # to be diff'ed with Unix files without line endings
 | |
|     # causing every line to differ.
 | |
|     foreach (@lines) { s/\cM$// }
 | |
| 
 | |
|     print "<- read_file\n" if $opt_v and $opt_v > 2;
 | |
|     return @lines;
 | |
| } # 1}}}
 | |
| sub rm_blanks {                              # {{{1
 | |
|     my ($ra_in    ,
 | |
|         $language ,
 | |
|         $rh_EOL_continuation_re) = @_;
 | |
|     print "-> rm_blanks(language=$language)\n" if $opt_v > 2;
 | |
| #print "rm_blanks: language = [$language]\n";
 | |
|     my @out = ();
 | |
|     if ($language eq "COBOL") {
 | |
|         @out = remove_cobol_blanks($ra_in);
 | |
|     } else {
 | |
|         # removes blank lines
 | |
|         if (defined $rh_EOL_continuation_re->{$language}) {
 | |
|             @out = remove_matches_2re($ra_in, blank_regex($language),
 | |
|                                       $rh_EOL_continuation_re->{$language});
 | |
|         } else {
 | |
|             @out = remove_matches($ra_in, blank_regex($language));
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print "<- rm_blanks(language=$language, n_remain= ",
 | |
|         scalar(@out), "\n" if $opt_v > 2;
 | |
|     return @out;
 | |
| } # 1}}}
 | |
| sub blank_regex {                            # {{{1
 | |
|     my ($language) = @_;
 | |
| 
 | |
|     print "-> blank_regex(language=$language)\n" if $opt_v > 2;
 | |
| 
 | |
|     my $blank_regex = '^\s*$';
 | |
|     if ($language eq "X++") {
 | |
|         $blank_regex = '^\s*#?\s*$';
 | |
|     }
 | |
| 
 | |
|     print "<- blank_regex(language=$language) = \"", $blank_regex, "\"\n" if $opt_v > 2;
 | |
|     return $blank_regex;
 | |
| } # 1}}}
 | |
| sub rm_comments {                            # {{{1
 | |
|     my ($ra_lines , # in, must be free of blank lines
 | |
|         $language , # in
 | |
|         $file     , # in (some language counters, eg Haskell, need
 | |
|                     #     access to the original file)
 | |
|         $rh_EOL_continuation_re , # in
 | |
|         $raa_Errors , # out
 | |
|        ) = @_;
 | |
|     print "-> rm_comments(file=$file)\n" if $opt_v > 2;
 | |
|     my @routines       = @{$Filters_by_Language{$language}};
 | |
|     my @lines          = @{$ra_lines};
 | |
|     my @original_lines = @{$ra_lines};
 | |
| 
 | |
|     if (!scalar @original_lines) {
 | |
|         return @lines;
 | |
|     }
 | |
| 
 | |
|     foreach my $call_string (@routines) {
 | |
|         my $subroutine = $call_string->[0];
 | |
|         next if $subroutine eq "rm_comments_in_strings" and !$opt_strip_str_comments;
 | |
|         if (! defined &{$subroutine}) {
 | |
|             warn "rm_comments undefined subroutine $subroutine for $file\n";
 | |
|             next;
 | |
|         }
 | |
|         print "rm_comments file=$file sub=$subroutine\n" if $opt_v > 1;
 | |
|         my @args  = @{$call_string};
 | |
|         shift @args; # drop the subroutine name
 | |
|         if (@args and $args[0] eq '>filename<') {
 | |
|             shift   @args;
 | |
|             unshift @args, $file;
 | |
|         }
 | |
| 
 | |
|         # Unusual inputs, namely /* within strings without
 | |
|         # a corresponding */ can cause huge delays so put a timer on this.
 | |
|         my $max_duration_sec = scalar(@lines)/1000.0; # est lines per second
 | |
|            $max_duration_sec = 1.0 if $max_duration_sec < 1;
 | |
|         if (defined $opt_timeout) {
 | |
|             $max_duration_sec = $opt_timeout if $opt_timeout > 0;
 | |
|         }
 | |
| #my $T_start = Time::HiRes::time();
 | |
|         eval {
 | |
|             local $SIG{ALRM} = sub { die "alarm\n" };
 | |
|             alarm $max_duration_sec;
 | |
|             no strict 'refs';
 | |
|             @lines = &{$subroutine}(\@lines, @args);   # apply filter...
 | |
|             alarm 0;
 | |
|         };
 | |
|         if ($@) {
 | |
|             # timed out
 | |
|             die unless $@ eq "alarm\n";
 | |
|             push @{$raa_Errors},
 | |
|                 [ $Error_Codes{'Line count, exceeded timeout'}, $file ];
 | |
|             @lines = ();
 | |
|             if ($opt_v) {
 | |
|                 warn "rm_comments($subroutine): exceeded timeout for $file--ignoring\n";
 | |
|             }
 | |
|             next;
 | |
|         }
 | |
| #print "end time = ",Time::HiRes::time() - $T_start;
 | |
| 
 | |
|         print "   rm_comments after $subroutine line count=",
 | |
|             scalar(@lines), "\n" if $opt_v > 2;
 | |
| 
 | |
| #print "lines after=\n";
 | |
| #print Dumper(\@lines);
 | |
| 
 | |
|         print_lines($file, "After $subroutine(@args)", \@lines)
 | |
|             if $opt_print_filter_stages;
 | |
|         # then remove blank lines which are created by comment removal
 | |
|         if (defined $rh_EOL_continuation_re->{$language}) {
 | |
|             @lines = remove_matches_2re(\@lines, '^\s*$',
 | |
|                                         $rh_EOL_continuation_re->{$language});
 | |
|         } else {
 | |
|             @lines = remove_matches(\@lines, '^\s*$');
 | |
|         }
 | |
| 
 | |
|         print_lines($file, "post $subroutine(@args) blank cleanup:", \@lines)
 | |
|             if $opt_print_filter_stages;
 | |
|     }
 | |
| 
 | |
|     foreach (@lines) { chomp }   # make sure no spurious newlines were added
 | |
| 
 | |
|     # Exception for scripting languages:  treat the first #! line as code.
 | |
|     # Will need to add it back in if it was removed earlier.
 | |
|     chomp( $original_lines[0] );
 | |
|     if (defined $Script_Language{$language} and
 | |
|         $original_lines[0] =~ /^#!/ and
 | |
|         (!scalar(@lines) or ($lines[0] ne $original_lines[0]))) {
 | |
|         unshift @lines, $original_lines[0];  # add the first line back
 | |
|     }
 | |
| 
 | |
|     print "<- rm_comments\n" if $opt_v > 2;
 | |
|     return @lines;
 | |
| } # 1}}}
 | |
| sub rm_code {                                # {{{1
 | |
|     # Return lines containing only comments.
 | |
|     my ($ra_lines_w_comments , # in
 | |
|         $ra_lines_no_comments, # in
 | |
|        )  = @_;
 | |
| 
 | |
|     my @w_comments_no_blanks  = grep { ! /^\s*$/ } @{$ra_lines_w_comments} ;
 | |
|     my @no_comments_no_blanks = grep { ! /^\s*$/ } @{$ra_lines_no_comments};
 | |
|     chomp( @w_comments_no_blanks  );
 | |
|     chomp( @no_comments_no_blanks );
 | |
| 
 | |
|     my @sdiffs = sdiff( \@w_comments_no_blanks, \@no_comments_no_blanks, );
 | |
|     my @comments = ();
 | |
|     foreach my $entry (@sdiffs) {
 | |
|         my ($out_1, $out_2) = ('', '');
 | |
|         next if $entry->[0] eq 'u';
 | |
|         if ($entry->[0] eq '-') {
 | |
|             $out_1 = $entry->[1];
 | |
|         }
 | |
|         next if $out_1 =~ /^\s*$/;
 | |
|         push @comments, $out_1;
 | |
|     }
 | |
|     return @comments;
 | |
| } # 1}}}
 | |
| sub remove_first_n {                         # {{{1
 | |
|     my ($n, $ra_lines, ) = @_;
 | |
|     print "-> remove_first_n\n" if $opt_v > 2;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     if (scalar @{$ra_lines} > $n) {
 | |
|         for (my $i = $n; $i < scalar @{$ra_lines}; $i++) {
 | |
|             push @save_lines, $ra_lines->[$i];
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print "<- remove_first_n\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_f77_comments {                    # {{{1
 | |
|     my ($ra_lines, ) = @_;
 | |
|     print "-> remove_f77_comments\n" if $opt_v > 2;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     foreach (@{$ra_lines}) {
 | |
|         next if m{^[*cC]};
 | |
|         next if m{^\s*!};
 | |
|         push @save_lines, $_;
 | |
|     }
 | |
| 
 | |
|     print "<- remove_f77_comments\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_f90_comments {                    # {{{1
 | |
|     # derived from SLOCCount
 | |
|     my ($ra_lines, ) = @_;
 | |
|     print "-> remove_f90_comments\n" if $opt_v > 2;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     foreach (@{$ra_lines}) {
 | |
|         # a comment is              m/^\s*!/
 | |
|         # an empty line is          m/^\s*$/
 | |
|         # a HPF statement is        m/^\s*!hpf\$/i
 | |
|         # an Open MP statement is   m/^\s*!omp\$/i
 | |
|         if (! m/^(\s*!|\s*$)/ || m/^\s*!(hpf|omp)\$/i) {
 | |
|             push @save_lines, $_;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print "<- remove_f90_comments\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub reduce_to_rmd_code_blocks {              #{{{1
 | |
|     my ($ra_lines) = @_; #in
 | |
|     print "-> reduce_to_rmd_code_blocks()\n" if $opt_v > 2;
 | |
| 
 | |
|     my $in_code_block = 0;
 | |
|     my @save_lines = ();
 | |
|     foreach (@{$ra_lines}) {
 | |
|         if ( m/^```\{\s*[[:alpha:]]/ ) {
 | |
|             $in_code_block = 1;
 | |
|             next;
 | |
|         }
 | |
|         if ( m/^```\s*$/ ) {
 | |
|             $in_code_block = 0;
 | |
|         }
 | |
|         next if (!$in_code_block);
 | |
|         push @save_lines, $_;
 | |
|     }
 | |
| 
 | |
|     print "<- reduce_to_rmd_code_blocks()\n" if $opt_v> 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_matches {                         # {{{1
 | |
|     my ($ra_lines, # in
 | |
|         $pattern , # in   Perl regular expression (case insensitive)
 | |
|        ) = @_;
 | |
|     print "-> remove_matches(pattern=$pattern)\n" if $opt_v > 2;
 | |
|     print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     foreach (@{$ra_lines}) {
 | |
| #chomp;
 | |
| #print "remove_matches [$pattern] [$_]\n";
 | |
|         next if m{$pattern}i;
 | |
| #       s{$pattern}{}i;
 | |
| #       next unless /\S/; # at least one non space
 | |
|         push @save_lines, $_;
 | |
|     }
 | |
| 
 | |
|     print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | |
|     print "<- remove_matches\n" if $opt_v > 2;
 | |
| #print "remove_matches returning\n   ", join("\n   ", @save_lines), "\n";
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_matches_2re {                     # {{{1
 | |
|     my ($ra_lines, # in
 | |
|         $pattern1, # in Perl regex 1 (case insensitive) to match
 | |
|         $pattern2, # in Perl regex 2 (case insensitive) to not match prev line
 | |
|        ) = @_;
 | |
|     print "-> remove_matches_2re(pattern=$pattern1,$pattern2)\n" if $opt_v > 2;
 | |
|     print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     for (my $i = 0; $i < scalar @{$ra_lines}; $i++) {
 | |
| #       chomp($ra_lines->[$i]);
 | |
| #print "remove_matches_2re [$pattern1] [$pattern2] [$ra_lines->[$i]]\n";
 | |
|         if ($i) {
 | |
| #print "remove_matches_2re prev=[$ra_lines->[$i-1]] this=[$ra_lines->[$i]]\n";
 | |
|             next if ($ra_lines->[$i]   =~ m{$pattern1}i) and
 | |
|                     ($ra_lines->[$i-1] !~ m{$pattern2}i);
 | |
|         } else {
 | |
|             # on first line
 | |
|             next if $ra_lines->[$i]   =~  m{$pattern1}i;
 | |
|         }
 | |
|         push @save_lines, $ra_lines->[$i];
 | |
|     }
 | |
| 
 | |
|     print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | |
|     print "<- remove_matches_2re\n" if $opt_v > 2;
 | |
| #print "remove_matches_2re returning\n   ", join("\n   ", @save_lines), "\n";
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_inline {                          # {{{1
 | |
|     my ($ra_lines, # in
 | |
|         $pattern , # in   Perl regular expression (case insensitive)
 | |
|        ) = @_;
 | |
|     print "-> remove_inline(pattern=$pattern)\n" if $opt_v > 2;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     unless ($opt_inline) {
 | |
|         return @{$ra_lines};
 | |
|     }
 | |
|     my $nLines_affected = 0;
 | |
|     foreach (@{$ra_lines}) {
 | |
| #chomp; print "remove_inline [$pattern] [$_]\n";
 | |
|         if (m{$pattern}i) {
 | |
|             ++$nLines_affected;
 | |
|             s{$pattern}{}i;
 | |
|         }
 | |
|         push @save_lines, $_;
 | |
|     }
 | |
| 
 | |
|     print "<- remove_inline\n" if $opt_v > 2;
 | |
| #print "remove_inline returning\n   ", join("\n   ", @save_lines), "\n";
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_above {                           # {{{1
 | |
|     my ($ra_lines, $marker, ) = @_;
 | |
|     print "-> remove_above(marker=$marker)\n" if $opt_v > 2;
 | |
| 
 | |
|     # Make two passes through the code:
 | |
|     # 1. check if the marker exists
 | |
|     # 2. remove anything above the marker if it exists,
 | |
|     #    do nothing if the marker does not exist
 | |
| 
 | |
|     # Pass 1
 | |
|     my $found_marker = 0;
 | |
|     for (my $line_number  = 1;
 | |
|             $line_number <= scalar @{$ra_lines};
 | |
|             $line_number++) {
 | |
|         if ($ra_lines->[$line_number-1] =~ m{$marker}) {
 | |
|             $found_marker = $line_number;
 | |
|             last;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     # Pass 2 only if needed
 | |
|     my @save_lines = ();
 | |
|     if ($found_marker) {
 | |
|         my $n = 1;
 | |
|         foreach (@{$ra_lines}) {
 | |
|             push @save_lines, $_
 | |
|                 if $n >= $found_marker;
 | |
|             ++$n;
 | |
|         }
 | |
|     } else { # marker wasn't found; save all lines
 | |
|         foreach (@{$ra_lines}) {
 | |
|             push @save_lines, $_;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print "<- remove_above\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_below {                           # {{{1
 | |
|     my ($ra_lines, $marker, ) = @_;
 | |
|     print "-> remove_below(marker=$marker)\n" if $opt_v > 2;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     foreach (@{$ra_lines}) {
 | |
|         last if m{$marker};
 | |
|         push @save_lines, $_;
 | |
|     }
 | |
| 
 | |
|     print "<- remove_below\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_below_above {                     # {{{1
 | |
|     my ($ra_lines, $marker_below, $marker_above, ) = @_;
 | |
|     # delete lines delimited by start and end line markers such
 | |
|     # as Perl POD documentation
 | |
|     print "-> remove_below_above(markerB=$marker_below, A=$marker_above)\n"
 | |
|         if $opt_v > 2;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     my $between    = 0;
 | |
|     foreach (@{$ra_lines}) {
 | |
|         if (!$between and m{$marker_below}) {
 | |
|             $between    = 1;
 | |
|             next;
 | |
|         }
 | |
|         if ($between and m{$marker_above}) {
 | |
|             $between    = 0;
 | |
|             next;
 | |
|         }
 | |
|         next if $between;
 | |
|         push @save_lines, $_;
 | |
|     }
 | |
| 
 | |
|     print "<- remove_below_above\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_between {                         # {{{1
 | |
|     my ($ra_lines, $marker, ) = @_;
 | |
|     # $marker must contain one of the balanced pairs understood
 | |
|     # by Regexp::Common::balanced, namely
 | |
|     # '{}'  '()'  '[]'  or  '<>'
 | |
| 
 | |
|     print "-> remove_between(marker=$marker)\n" if $opt_v > 2;
 | |
|     print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | |
|     my %acceptable = ('{}'=>1,  '()'=>1,  '[]'=>1,  '<>'=>1, );
 | |
|     die "remove_between:  invalid delimiter '$marker'\n",
 | |
|         "the delimiter must be one of these four pairs:\n",
 | |
|         "{}  ()  []  <>\n" unless
 | |
|         $acceptable{$marker};
 | |
| 
 | |
|     Install_Regexp_Common() unless $HAVE_Rexexp_Common;
 | |
| 
 | |
|     my $all_lines = join("", @{$ra_lines});
 | |
| 
 | |
|     no strict 'vars';
 | |
|     # otherwise get:
 | |
|     #  Global symbol "%RE" requires explicit package name at cloc line xx.
 | |
|     if ($all_lines =~ m/$RE{balanced}{-parens => $marker}/) {
 | |
|         no warnings;
 | |
|         $all_lines =~ s/$1//g;
 | |
|     }
 | |
| 
 | |
|     print "[", join("]\n[", split(/\n/, $all_lines)), "]\n" if $opt_v > 4;
 | |
|     print "<- remove_between\n" if $opt_v > 2;
 | |
|     return split("\n", $all_lines);
 | |
| } # 1}}}
 | |
| sub rm_comments_in_strings {                 # {{{1
 | |
|     my ($ra_lines, $string_marker, $start_comment, $end_comment, $multiline_mode) = @_;
 | |
|     $multiline_mode = 0 if not defined $multiline_mode;
 | |
|     # Replace comments within strings with 'xx'.
 | |
| 
 | |
|     print "-> rm_comments_in_strings(string_marker=$string_marker, " .
 | |
|           "start_comment=$start_comment, end_comment=$end_comment)\n"
 | |
|         if $opt_v > 2;
 | |
|     print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     my $in_ml_string = 0;
 | |
|     foreach my $line (@{$ra_lines}) {
 | |
|        #print "line=[$line]\n";
 | |
|         my $new_line = "";
 | |
| 
 | |
|         if ($line !~ /${string_marker}/) {
 | |
|             # short circuit; no strings on this line
 | |
|             if ( $in_ml_string ) {
 | |
|                 $line =~ s/\Q${start_comment}\E/xx/g;
 | |
|                 $line =~ s/\Q${end_comment}\E/xx/g if $end_comment;
 | |
|             }
 | |
|             push @save_lines, $line;
 | |
|             next;
 | |
|         }
 | |
| 
 | |
|         # replace backslashed string markers with 'Q'
 | |
|         $line =~ s/\\${string_marker}/Q/g;
 | |
| 
 | |
|         if ( $in_ml_string and $line =~ /^(.*?)(${string_marker})(.*)$/ ) {
 | |
|             # A multiline string ends on this line. Process the part
 | |
|             # until the end of the multiline string first.
 | |
|             my ($lastpart_ml_string, $firstpart_marker, $rest_of_line )  = ($1, $2, $3);
 | |
|             $lastpart_ml_string =~ s/\Q${start_comment}\E/xx/g;
 | |
|             $lastpart_ml_string =~ s/\Q${end_comment}\E/xx/g if $end_comment;
 | |
|             $new_line = $lastpart_ml_string . $firstpart_marker;
 | |
|             $line = $rest_of_line;
 | |
|             $in_ml_string = 0;
 | |
|         }
 | |
| 
 | |
|         my @tokens = split(/(${string_marker}.*?${string_marker})/, $line);
 | |
|         foreach my $t (@tokens) {
 | |
|            #printf "  t0 = [$t]\n";
 | |
|             if ($t =~ /${string_marker}.*${string_marker}$/) {
 | |
|                 # enclosed in quotes; process this token
 | |
|                 $t =~ s/\Q${start_comment}\E/xx/g;
 | |
|                 $t =~ s/\Q${end_comment}\E/xx/g if $end_comment;
 | |
|             }
 | |
|             elsif ( $multiline_mode and $t =~ /(${string_marker})/ ) {
 | |
|                 # Unclosed quote present in line. If multiline_mode is enabled,
 | |
|                 # consider it the start of a multiline string.
 | |
|                 my $firstpart_marker = $1;
 | |
|                 my @sub_token = split(/${string_marker}/, $t );
 | |
| 
 | |
|                 if ( scalar @sub_token == 1 ) {
 | |
|                     # The line ends with a string marker that starts
 | |
|                     # a multiline string.
 | |
|                     $t = $sub_token[0] . $firstpart_marker;
 | |
|                     $in_ml_string = 1;
 | |
|                 }
 | |
|                 elsif ( scalar @sub_token == 2 ) {
 | |
|                     # The line has some more content after the string
 | |
|                     # marker that starts a multiline string
 | |
|                     $t = $sub_token[0] . $firstpart_marker;
 | |
|                     $sub_token[1] =~ s/\Q${start_comment}\E/xx/g;
 | |
|                     $sub_token[1] =~ s/\Q${end_comment}\E/xx/g if $end_comment;
 | |
|                     $t .= $sub_token[1];
 | |
|                     $in_ml_string = 1;
 | |
|                 } else {
 | |
|                     print "Warning: rm_comments_in_string length \@sub_token > 2\n";
 | |
|                 }
 | |
| 
 | |
|             }
 | |
|             #printf "  t1 = [$t]\n";
 | |
|             $new_line .= $t;
 | |
|         }
 | |
|         push @save_lines, $new_line;
 | |
|     }
 | |
| 
 | |
|     print "[", join("]\n[", @save_lines), "]\n" if $opt_v > 4;
 | |
|     print "<- rm_comments_in_strings\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_between_general {                 # {{{1
 | |
|     my ($ra_lines, $start_marker, $end_marker, ) = @_;
 | |
|     # Start and end markers may be any length strings.
 | |
| 
 | |
|     print "-> remove_between_general(start=$start_marker, end=$end_marker)\n"
 | |
|         if $opt_v > 2;
 | |
| 
 | |
|     my $all_lines = join("", @{$ra_lines});
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     my $in_comment = 0;
 | |
|     foreach (@{$ra_lines}) {
 | |
| 
 | |
|         next if /^\s*$/;
 | |
|         s/\Q$start_marker\E.*?\Q$end_marker\E//g;  # strip one-line comments
 | |
|         next if /^\s*$/;
 | |
|         if ($in_comment) {
 | |
|             if (/\Q$end_marker\E/) {
 | |
|                 s/^.*?\Q$end_marker\E//;
 | |
|                 $in_comment = 0;
 | |
|             }
 | |
|             next if $in_comment;
 | |
|         }
 | |
|         next if /^\s*$/;
 | |
|         $in_comment = 1 if /^(.*?)\Q$start_marker\E/; # $1 may be blank or code
 | |
|         next if defined $1 and $1 =~ /^\s*$/; # leading blank; all comment
 | |
|         if ($in_comment) {
 | |
|             # part code, part comment; strip the comment and keep the code
 | |
|             s/^(.*?)\Q$start_marker\E.*$/$1/;
 | |
|         }
 | |
|         push @save_lines, $_;
 | |
|     }
 | |
| 
 | |
|     print "<- remove_between_general\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_between_regex   {                 # {{{1
 | |
|     my ($ra_lines, $start_RE, $end_RE, ) = @_;
 | |
|     # Start and end regex's may be any length strings.
 | |
| 
 | |
|     print "-> remove_between_regex(start=$start_RE, end=$end_RE)\n"
 | |
|         if $opt_v > 2;
 | |
| 
 | |
|     my $all_lines = join("", @{$ra_lines});
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     my $in_comment = 0;
 | |
|     foreach (@{$ra_lines}) {
 | |
| 
 | |
|         next if /^\s*$/;
 | |
|         s/${start_RE}.*?${end_RE}//g;  # strip one-line comments
 | |
|         next if /^\s*$/;
 | |
|         if ($in_comment) {
 | |
|             if (/$end_RE/) {
 | |
|                 s/^.*?${end_RE}//;
 | |
|                 $in_comment = 0;
 | |
|             }
 | |
|             next if $in_comment;
 | |
|         }
 | |
|         next if /^\s*$/;
 | |
|         $in_comment = 1 if /^(.*?)${start_RE}/; # $1 may be blank or code
 | |
|         next if defined $1 and $1 =~ /^\s*$/; # leading blank; all comment
 | |
|         if ($in_comment) {
 | |
|             # part code, part comment; strip the comment and keep the code
 | |
|             s/^(.*?)${start_RE}.*$/$1/;
 | |
|         }
 | |
|         push @save_lines, $_;
 | |
|     }
 | |
| 
 | |
|     print "<- remove_between_regex\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub replace_regex  {                         # {{{1
 | |
|     my ($ra_lines, $regex, $replace, ) = @_;
 | |
| 
 | |
|     print "-> replace_regex(regex=$regex, replace=$replace)\n"
 | |
|         if $opt_v > 2;
 | |
| 
 | |
|     my $all_lines = join("", @{$ra_lines});
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     my $in_comment = 0;
 | |
|     foreach (@{$ra_lines}) {
 | |
| 
 | |
|         next if /^\s*$/;
 | |
|         s/${regex}/${replace}/g;
 | |
|         next if /^\s*$/;
 | |
|         push @save_lines, $_;
 | |
|     }
 | |
| 
 | |
|     print "<- replace_regex\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub replace_between_regex  {                 # {{{1
 | |
|     my ($ra_lines, $start_RE, $end_RE, $replace_RE, $multiline_mode ) = @_;
 | |
|     # If multiline_mode is enabled, $replace_RE should not refer
 | |
|     # to any captured groups in $start_RE.
 | |
|     $multiline_mode = 1 if not defined $multiline_mode;
 | |
|     # Start and end regex's may be any length strings.
 | |
| 
 | |
|     print "-> replace_between_regex(start=$start_RE, end=$end_RE, replace=$replace_RE)\n"
 | |
|         if $opt_v > 2;
 | |
| 
 | |
|     my $all_lines = join("", @{$ra_lines});
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     my $in_comment = 0;
 | |
|     foreach (@{$ra_lines}) {
 | |
| 
 | |
|         next if /^\s*$/;
 | |
|         s/${start_RE}.*?${end_RE}/${replace_RE}/eeg;  # strip one-line comments
 | |
|         next if /^\s*$/;
 | |
|         if ($in_comment) {
 | |
|             if (/$end_RE/) {
 | |
|                 s/^.*?${end_RE}/${replace_RE}/ee;
 | |
|                 $in_comment = 0;
 | |
|             }
 | |
|             next if $in_comment;
 | |
|         }
 | |
|         next if /^\s*$/;
 | |
|         $in_comment = 1 if $multiline_mode and /^(.*?)${start_RE}/ ; # $1 may be blank or code
 | |
|         next if defined $1 and $1 =~ /^\s*$/; # leading blank; all comment
 | |
|         if ($in_comment) {
 | |
|             # part code, part comment; strip the comment and keep the code
 | |
|             s/^(.*?)${start_RE}.*$/$1/;
 | |
|         }
 | |
|         push @save_lines, $_;
 | |
|     }
 | |
| 
 | |
|     print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | |
|     print "<- replace_between_regex\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_cobol_blanks {                    # {{{1
 | |
|     # subroutines derived from SLOCCount
 | |
|     my ($ra_lines, ) = @_;
 | |
| 
 | |
|     my $free_format = 0;  # Support "free format" source code.
 | |
|     my @save_lines  = ();
 | |
| 
 | |
|     foreach (@{$ra_lines}) {
 | |
|         next if m/^\s*$/;
 | |
|         my $line = expand($_);  # convert tabs to equivalent spaces
 | |
|         $free_format = 1 if $line =~ m/^......\$.*SET.*SOURCEFORMAT.*FREE/i;
 | |
|         if ($free_format) {
 | |
|             push @save_lines, $_;
 | |
|         } else {
 | |
|             # Greg Toth:
 | |
|             #  (1) Treat lines with any alphanum in cols 1-6 and
 | |
|             #      blanks in cols 7 through 71 as blank line, and
 | |
|             #  (2) Treat lines with any alphanum in cols 1-6 and
 | |
|             #      slash (/) in col 7 as blank line (this is a
 | |
|             #      page eject directive).
 | |
|             push @save_lines, $_ unless m/^\d{6}\s*$/             or
 | |
|                                         ($line =~ m/^.{6}\s{66}/) or
 | |
|                                         ($line =~ m/^......\//);
 | |
|         }
 | |
|     }
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_cobol_comments {                  # {{{1
 | |
|     # subroutines derived from SLOCCount
 | |
|     my ($ra_lines, ) = @_;
 | |
| 
 | |
|     my $free_format = 0;  # Support "free format" source code.
 | |
|     my @save_lines  = ();
 | |
| 
 | |
|     foreach (@{$ra_lines}) {
 | |
|         if (m/^......\$.*SET.*SOURCEFORMAT.*FREE/i) {$free_format = 1;}
 | |
|         if ($free_format) {
 | |
|             push @save_lines, $_ unless m{^\s*\*};
 | |
|         } else {
 | |
|             push @save_lines, $_ unless m{^......\*} or m{^\*};
 | |
|         }
 | |
|     }
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_jcl_comments {                    # {{{1
 | |
|     my ($ra_lines, ) = @_;
 | |
| 
 | |
|     print "-> remove_jcl_comments\n" if $opt_v > 2;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     my $in_comment = 0;
 | |
|     foreach (@{$ra_lines}) {
 | |
|         next if /^\s*$/;
 | |
|         next if m{^//\*};
 | |
|         last if m{^\s*//\s*$};
 | |
|         push @save_lines, $_;
 | |
|     }
 | |
| 
 | |
|     print "<- remove_jcl_comments\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_jsp_comments {                    # {{{1
 | |
|     #  JSP comment is   <%--  body of comment   --%>
 | |
|     my ($ra_lines, ) = @_;
 | |
| 
 | |
|     print "-> remove_jsp_comments\n" if $opt_v > 2;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     my $in_comment = 0;
 | |
|     foreach (@{$ra_lines}) {
 | |
| 
 | |
|         next if /^\s*$/;
 | |
|         s/<\%\-\-.*?\-\-\%>//g;  # strip one-line comments
 | |
|         next if /^\s*$/;
 | |
|         if ($in_comment) {
 | |
|             if (/\-\-\%>/) {
 | |
|                 s/^.*?\-\-\%>//;
 | |
|                 $in_comment = 0;
 | |
|             }
 | |
|         }
 | |
|         next if /^\s*$/;
 | |
|         $in_comment = 1 if /^(.*?)<\%\-\-/;
 | |
|         next if defined $1 and $1 =~ /^\s*$/;
 | |
|         next if ($in_comment);
 | |
|         push @save_lines, $_;
 | |
|     }
 | |
| 
 | |
|     print "<- remove_jsp_comments\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_html_comments {                   # {{{1
 | |
|     #  HTML comment is   <!--  body of comment   -->
 | |
|     #  Need to use my own routine until the HTML comment regex in
 | |
|     #  the Regexp::Common module can handle  <!--  --  -->
 | |
|     my ($ra_lines, ) = @_;
 | |
| 
 | |
|     print "-> remove_html_comments\n" if $opt_v > 2;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     my $in_comment = 0;
 | |
|     foreach (@{$ra_lines}) {
 | |
| 
 | |
|         next if /^\s*$/;
 | |
|         s/<!\-\-.*?\-\->//g;  # strip one-line comments
 | |
|         next if /^\s*$/;
 | |
|         if ($in_comment) {
 | |
|             if (/\-\->/) {
 | |
|                 s/^.*?\-\->//;
 | |
|                 $in_comment = 0;
 | |
|             }
 | |
|         }
 | |
|         next if /^\s*$/;
 | |
|         $in_comment = 1 if /^(.*?)<!\-\-/;
 | |
|         if (defined $1 and $1 !~ /^\s*$/) {
 | |
|             # has both code and comment
 | |
|             push @save_lines, "$1\n";
 | |
|             next;
 | |
|         }
 | |
|         next if ($in_comment);
 | |
|         push @save_lines, $_;
 | |
|     }
 | |
| 
 | |
|     print "<- remove_html_comments\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_bf_comments {                     # {{{1
 | |
|     my ($ra_lines, ) = @_;
 | |
| 
 | |
|     print "-> remove_bf_comments\n" if $opt_v > 2;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     my $in_comment = 0;
 | |
|     foreach (@{$ra_lines}) {
 | |
| 
 | |
|         s/[^<>+-.,\[\]]+//g;
 | |
|         next if /^\s*$/;
 | |
|         push @save_lines, $_;
 | |
|     }
 | |
| 
 | |
|     print "<- remove_bf_comments\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub really_is_bf {                           # {{{1
 | |
|     my ($file, ) = @_;
 | |
| 
 | |
|     print "-> really_is_bf\n" if $opt_v > 2;
 | |
|     my $n_bf_indicators  = 0;
 | |
|     my @lines = read_file($file);
 | |
|     foreach my $L (@lines) {
 | |
|         my $ind = 0;
 | |
|         if ($L =~ /([+-]{4,}  |          # at least four +'s or -'s in a row
 | |
|                    [\[\]]{4,} |          # at least four [ or ] in a row
 | |
|                    [<>][+-]   |          # >- or >+ or <+ or <-
 | |
|                    <{3,}      |          # at least three < in a row
 | |
|                    ^\s*[\[\]]\s*$)/x) {  # [ or ] on line by itself
 | |
|             ++$n_bf_indicators;
 | |
|             $ind = 1;
 | |
|         }
 | |
|         # if ($ind) { print "YES: $L"; } else { print "NO : $L"; }
 | |
|     }
 | |
|     my $ratio = scalar(@lines) > 0 ? $n_bf_indicators / scalar(@lines) : 0;
 | |
|     my $decision = ($ratio > 0.5) || ($n_bf_indicators > 5);
 | |
|     printf "<- really_is_bf(Y/N=%d %s, R=%.3f, N=%d)\n",
 | |
|             $decision, $file, $ratio, $n_bf_indicators if $opt_v > 2;
 | |
|     return $decision;
 | |
| } # 1}}}
 | |
| sub remove_indented_block {                  # {{{1
 | |
|     # Haml block comments are defined by a silent comment marker like
 | |
|     #    /
 | |
|     # or
 | |
|     #    -#
 | |
|     # followed by indented text on subsequent lines.
 | |
|     # http://haml.info/docs/yardoc/file.REFERENCE.html#comments
 | |
|     my ($ra_lines, $regex, ) = @_;
 | |
| 
 | |
|     print "-> remove_indented_block\n" if $opt_v > 2;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     my $in_comment = 0;
 | |
|     foreach (@{$ra_lines}) {
 | |
| 
 | |
|         next if /^\s*$/;
 | |
|         my $line = expand($_);  # convert tabs to equivalent spaces
 | |
|         if ($in_comment) {
 | |
|             $line =~ /^(\s*)/;
 | |
|             # print "indent=", length $1, "\n";
 | |
|             if (length $1 < $in_comment) {
 | |
|                 # indent level is less than comment level
 | |
|                 # are back in code
 | |
|                 $in_comment = 0;
 | |
|             } else {
 | |
|                 # still in comments, don't use this line
 | |
|                 next;
 | |
|             }
 | |
|         } elsif ($line =~ m{$regex}) {
 | |
|             if ($1) {
 | |
|                 $in_comment = length($1) + 1; # number of leading spaces + 1
 | |
|             } else {
 | |
|                 $in_comment = 1;
 | |
|             }
 | |
|             # print "in_comment=$in_comment\n";
 | |
|             next;
 | |
|         }
 | |
|         push @save_lines, $line;
 | |
|     }
 | |
| 
 | |
|     print "<- remove_indented_block\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_haml_block {                      # {{{1
 | |
|     # Haml block comments are defined by a silent comment marker like
 | |
|     #    /
 | |
|     # or
 | |
|     #    -#
 | |
|     # followed by indented text on subsequent lines.
 | |
|     # http://haml.info/docs/yardoc/file.REFERENCE.html#comments
 | |
|     my ($ra_lines, ) = @_;
 | |
| 
 | |
|     return remove_indented_block($ra_lines, '^(\s*)(/|-#)\s*$');
 | |
| 
 | |
| } # 1}}}
 | |
| sub remove_pug_block {                       # {{{1
 | |
|     # Haml block comments are defined by a silent comment marker like
 | |
|     #    //
 | |
|     # followed by indented text on subsequent lines.
 | |
|     # http://jade-lang.com/reference/comments/
 | |
|     my ($ra_lines, ) = @_;
 | |
|     return remove_indented_block($ra_lines, '^(\s*)(//)\s*$');
 | |
| } # 1}}}
 | |
| sub remove_OCaml_comments {                  # {{{1
 | |
|     my ($ra_lines, ) = @_;
 | |
| 
 | |
|     print "-> remove_OCaml_comments\n" if $opt_v > 2;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     my $in_comment = 0;   # counter to depth of nested comments
 | |
|     foreach my $L (@{$ra_lines}) {
 | |
|         next if $L =~ /^\s*$/;
 | |
|         # make an array of tokens where a token is a start comment
 | |
|         # marker, end comment marker, string, or anything else
 | |
|         my $clean_line = ""; # ie, free of comments
 | |
|         my @tokens = split(/(\(\*|\*\)|".*?")/, $L);
 | |
|         foreach my $t (@tokens) {
 | |
|             next unless $t;
 | |
|             if      ($t eq "(*") {
 | |
|                 ++$in_comment;
 | |
|             } elsif ($t eq "*)") {
 | |
|                 --$in_comment;
 | |
|             } elsif (!$in_comment) {
 | |
|                 $clean_line .= $t;
 | |
|             }
 | |
|         }
 | |
|         push @save_lines, $clean_line if $clean_line;
 | |
|     }
 | |
|     print "<- remove_OCaml_comments\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_TLAPlus_generated_code {          # {{{1
 | |
|     my ($ra_lines, ) = @_;
 | |
|     # If --no-autogen, remove code created by the PlusCal translator.
 | |
|     return @{$ra_lines} if !$opt_no_autogen;
 | |
|     return remove_between_regex($ra_lines, '^\\\\\\* BEGIN TRANSLATION\b', '^\\\\\\* END TRANSLATION\b');
 | |
| } # 1}}}
 | |
| sub remove_TLAPlus_comments {                # {{{1
 | |
|     my ($ra_lines, ) = @_;
 | |
|     # TLA+ block comments are like OCaml comments (remove_OCaml_comments).
 | |
|     # However, TLA+ comments may contain PlusCal code, and we must consider the
 | |
|     # PlusCal code as code, not comments.
 | |
| 
 | |
|     print "-> remove_TLAPlus_comments\n" if $opt_v > 2;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     my $in_comment = 0;        # counter to depth of nested (* *) comments
 | |
|     my $in_pluscal_head = 0;   # whether we saw the start of PlusCal code
 | |
|     my $in_pluscal_block = 0;  # counter to depth of nested { } blocks
 | |
|     my $saved_in_comment = 0;  # $in_comment before we entered PlusCal code
 | |
|     foreach my $L (@{$ra_lines}) {
 | |
|         next if $L =~ /^\s*$/;
 | |
|         # make an array of interesting tokens we need to act upon
 | |
|         my $clean_line = ""; # ie, free of comments
 | |
|         my @tokens = split(/(\(\*|\*\)|".*?"|[{}]|--fair\b|--algorithm|PlusCal options)/, $L);
 | |
|         foreach my $t (@tokens) {
 | |
|             next unless $t;
 | |
|             if      ($t eq "(*") {
 | |
|                 ++$in_comment;
 | |
|             } elsif ($t eq "*)") {
 | |
|                 --$in_comment;
 | |
|             } elsif ($t eq "{" && $in_pluscal_head) {
 | |
|                 # start block matching when we see '{' in '--algorithm NAME {'
 | |
|                 $in_pluscal_head = 0;
 | |
|                 $in_pluscal_block = 1;
 | |
|                 $saved_in_comment = $in_comment;
 | |
|                 $in_comment = 0;
 | |
|                 $clean_line .= $t;
 | |
|             } elsif ($t eq "{" && $in_pluscal_block && !$in_comment) {
 | |
|                 ++$in_pluscal_block;
 | |
|                 $clean_line .= $t;
 | |
|             } elsif ($t eq "}" && $in_pluscal_block && !$in_comment) {
 | |
|                 --$in_pluscal_block;
 | |
|                 if ($in_pluscal_block == 0) {
 | |
|                     $in_comment = $saved_in_comment;
 | |
|                 }
 | |
|                 $clean_line .= $t;
 | |
|             } elsif ($t eq "--fair" || $t eq "--algorithm") {
 | |
|                 $in_pluscal_head = 1;
 | |
|                 $clean_line .= $t;
 | |
|             } elsif (!$in_comment || $in_pluscal_head || $in_pluscal_block || $t eq "PlusCal options") {
 | |
|                 $clean_line .= $t;
 | |
|             }
 | |
|         }
 | |
|         push @save_lines, $clean_line if $clean_line;
 | |
|     }
 | |
|     print "<- remove_TLAPlus_comments\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub remove_slim_block {                      # {{{1
 | |
|     # slim comments start with /
 | |
|     # followed by indented text on subsequent lines.
 | |
|     # http://www.rubydoc.info/gems/slim/frames
 | |
|     my ($ra_lines, ) = @_;
 | |
|     return remove_indented_block($ra_lines, '^(\s*)(/[^!])');
 | |
| } # 1}}}
 | |
| sub add_newlines {                           # {{{1
 | |
|     my ($ra_lines, ) = @_;
 | |
|     print "-> add_newlines \n" if $opt_v > 2;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     foreach (@{$ra_lines}) {
 | |
| 
 | |
|         push @save_lines, "$_\n";
 | |
|     }
 | |
| 
 | |
|     print "<- add_newlines \n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub docstring_to_C {                         # {{{1
 | |
|     my ($ra_lines, ) = @_;
 | |
|     # Converts Python docstrings to C comments.
 | |
| 
 | |
|     if ($opt_docstring_as_code) {
 | |
|         return @{$ra_lines};
 | |
|     }
 | |
| 
 | |
|     print "-> docstring_to_C()\n" if $opt_v > 2;
 | |
| 
 | |
|     my $in_docstring = 0;
 | |
|     foreach (@{$ra_lines}) {
 | |
|         while (/((""")|('''))/) {
 | |
|             if (!$in_docstring) {
 | |
|                 s{[uU]?((""")|('''))}{/*};
 | |
|                 $in_docstring = 1;
 | |
|             } else {
 | |
|                 s{((""")|('''))}{*/};
 | |
|                 $in_docstring = 0;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print "<- docstring_to_C\n" if $opt_v > 2;
 | |
|     return @{$ra_lines};
 | |
| } # 1}}}
 | |
| sub docstring_rm_comments {                  # {{{1
 | |
|     my ($ra_lines, ) = @_;
 | |
|     # Remove embedded C/C++ style comments in docstrings.
 | |
| 
 | |
|     print "-> docstring_rm_comments()\n" if $opt_v > 2;
 | |
| 
 | |
|     my $in_docstring = 0;
 | |
|     foreach (@{$ra_lines}) {
 | |
|         if (/((""")|('''))(.*?)\1/) {
 | |
|             # single line docstring
 | |
|             my ($i_start, $i_end) = ($-[0]+3, $+[0]-3);
 | |
|             # replace /*, */, // with xx
 | |
|             substr($_, $i_start, $i_end-$i_start) =~ s{(/\*|\*/|//)}{xx}g;
 | |
|             next;
 | |
|         } elsif (m{/\*.*?((""")|(''')).*?\*/}) {
 | |
|             # docstring start or end within /* */ comments
 | |
|             my $i_start = $-[0]+2;
 | |
|             substr($_, $i_start, 3) = "xxx";
 | |
|         } elsif (m{//.*?((""")|('''))}) {
 | |
|             # docstring start or end after //
 | |
|             my $i_start = $-[0]+2;
 | |
|             substr($_, $i_start, 3) = "xxx";
 | |
|         } elsif (/^(.*?)((""")|('''))/ and  $in_docstring) {
 | |
|             $in_docstring = 0;
 | |
|             my $i_end = length $1;
 | |
|             if ($i_end) {
 | |
|                 substr($_, 0, $i_end) =~ s{(/\*|\*/|//)}{xx}g;
 | |
|             }
 | |
|         } elsif (/((""")|('''))(.*?)$/ and !$in_docstring) {
 | |
|             $in_docstring = 1;
 | |
|             my $i_start = $-[0]+3;
 | |
|             substr($_, $i_start) =~ s{(/\*|\*/|//)}{xx}g;
 | |
|         } elsif ($in_docstring) {
 | |
|             s{(/\*|\*/|//)}{xx}g;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | |
|     print "<- docstring_rm_comments\n" if $opt_v > 2;
 | |
|     return @{$ra_lines};
 | |
| } # 1}}}
 | |
| sub jupyter_nb {                             # {{{1
 | |
|     my ($ra_lines, ) = @_;
 | |
|     # Translate .ipynb file content into an equivalent set of code
 | |
|     # lines as expected by cloc.
 | |
| 
 | |
|     print "-> jupyter_nb()\n" if $opt_v > 2;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     my $in_code   = 0;
 | |
|     my $in_source = 0;
 | |
|     foreach (@{$ra_lines}) {
 | |
|         if (!$in_code and !$in_source and /^\s*"cell_type":\s*"code",\s*$/) {
 | |
|             $in_code = 1;
 | |
|         } elsif ($in_code and !$in_source and /^\s*"source":\s*\[\s*$/) {
 | |
|             $in_source = 1;
 | |
|         } elsif ($in_code and $in_source) {
 | |
|             if (/^\s*"\s*\\n",\s*$/) {    #  "\n",  -> empty line
 | |
|                 next;
 | |
|             } elsif (/^\s*"\s*#/) {       #  comment within the code block
 | |
|                 next;
 | |
|             } elsif (/^\s*\]\s*$/) {
 | |
|                 $in_code   = 0;
 | |
|                 $in_source = 0;
 | |
|             } else {
 | |
|                 push @save_lines, $_;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print "<- jupyter_nb\n" if $opt_v > 2;
 | |
| 
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub elixir_doc_to_C {                        # {{{1
 | |
|     my ($ra_lines, ) = @_;
 | |
|     # Converts Elixir docs to C comments.
 | |
| 
 | |
|     print "-> elixir_doc_to_C()\n" if $opt_v > 2;
 | |
| 
 | |
|     my $in_docstring = 0;
 | |
|     foreach (@{$ra_lines}) {
 | |
|         if (!$in_docstring && /(\@(module)?doc\s+(~[sScC])?['"]{3})/) {
 | |
|             s{$1}{/*};
 | |
|             $in_docstring = 1;
 | |
|         } elsif ($in_docstring && /(['"]{3})/) {
 | |
|             s{$1}{*/};
 | |
|             $in_docstring = 0;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print "<- elixir_doc_to_C\n" if $opt_v > 2;
 | |
|     return @{$ra_lines};
 | |
| } # 1}}}
 | |
| sub Forth_paren_to_C  {                      # {{{1
 | |
|     my ($ra_lines, ) = @_;
 | |
|     # Converts Forth comment parentheses to C comments.
 | |
| 
 | |
|     print "-> Forth_paren_to_C()\n" if $opt_v > 2;
 | |
| 
 | |
|     my $in_comment = 0;
 | |
|     my $max_paren_pair_per_line = 255;
 | |
|     foreach (@{$ra_lines}) {
 | |
| #print "Forth_paren_to_C: [$_]\n";
 | |
|         my $n_iter = 0;
 | |
|         while (/\s\(\s/ or ($in_comment and /\)/)) {
 | |
| #print "TOP n_iter=$n_iter in_comment=$in_comment\n";
 | |
|             if (/\s\(\s.*?\)/) {
 | |
|                 # in-line parenthesis comment; handle here
 | |
|                 s/\s+\(\s+.*?\)//g;
 | |
| #print "B\n";
 | |
|             } elsif (!$in_comment and /\s\(\s/) {
 | |
|                 s{\s+\(\s+}{/*};
 | |
| #print "C\n";
 | |
|                 $in_comment = 1;
 | |
|             } elsif ($in_comment and /\)/) {
 | |
|                 s{\)}{*/};
 | |
| #print "D\n";
 | |
|                 $in_comment = 0;
 | |
|             } else {
 | |
|                 # gets here if it can't find a matching
 | |
|                 # close parenthesis; in this case the
 | |
|                 # results will likely be incorrect
 | |
|                 ++$n_iter;
 | |
| #print "E\n";
 | |
|                 last if $n_iter > $max_paren_pair_per_line;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print "<- Forth_paren_to_C\n" if $opt_v > 2;
 | |
|     return @{$ra_lines};
 | |
| } # 1}}}
 | |
| sub powershell_to_C {                        # {{{1
 | |
|     my ($ra_lines, ) = @_;
 | |
|     # Converts PowerShell block comment markers to C comments.
 | |
| 
 | |
|     print "-> powershell_to_C()\n" if $opt_v > 2;
 | |
| 
 | |
|     my $in_docstring = 0;
 | |
|     foreach (@{$ra_lines}) {
 | |
|         s{<#}{/*}g;
 | |
|         s{#>}{*/}g;
 | |
|     }
 | |
| 
 | |
|     print "<- powershell_to_C\n" if $opt_v > 2;
 | |
|     return @{$ra_lines};
 | |
| } # 1}}}
 | |
| sub smarty_to_C {                            # {{{1
 | |
|     my ($ra_lines, ) = @_;
 | |
|     # Converts Smarty comments to C comments.
 | |
| 
 | |
|     print "-> smarty_to_C()\n" if $opt_v > 2;
 | |
| 
 | |
|     foreach (@{$ra_lines}) {
 | |
|         s[{\*][/*]g;
 | |
|         s[\*}][*/]g;
 | |
|     }
 | |
| 
 | |
|     print "<- smarty_to_C\n" if $opt_v > 2;
 | |
|     return @{$ra_lines};
 | |
| } # 1}}}
 | |
| sub determine_lit_type {                     # {{{1
 | |
|   my ($file) = @_;
 | |
| 
 | |
|   my $FILE = open_file('<', $file, 0);
 | |
|   while (<$FILE>) {
 | |
|     if (m/^\\begin\{code\}/) { close $FILE; return 2; }
 | |
|     if (m/^>\s/) { close $FILE; return 1; }
 | |
|   }
 | |
| 
 | |
|   return 0;
 | |
| } # 1}}}
 | |
| sub remove_haskell_comments {                # {{{1
 | |
|     # SLOCCount's haskell_count script with modifications to handle
 | |
|     # Elm empty and nested block comments.
 | |
|     # Strips out {- .. -} and -- comments and counts the rest.
 | |
|     # Pragmas, {-#...}, are counted as SLOC.
 | |
|     # BUG: Doesn't handle strings with embedded block comment markers gracefully.
 | |
|     #      In practice, that shouldn't be a problem.
 | |
|     my ($ra_lines, $file, ) = @_;
 | |
| 
 | |
|     print "-> remove_haskell_comments\n" if $opt_v > 2;
 | |
| 
 | |
|     my @save_lines = ();
 | |
|     my $in_comment = 0;
 | |
|     my $incomment  = 0;
 | |
|     my $inlitblock = 0;
 | |
|     my $literate   = 0;
 | |
|     my $is_elm     = 0;
 | |
| 
 | |
|     $is_elm   = 1 if $file =~ /\.elm$/;
 | |
|     $literate = 1 if $file =~ /\.lhs$/;
 | |
|     if ($literate) { $literate = determine_lit_type($file) }
 | |
| 
 | |
|     foreach (@{$ra_lines}) {
 | |
|         chomp;
 | |
|         if ($literate == 1) {
 | |
|             if (!s/^>//) { s/.*//; }
 | |
|         } elsif ($literate == 2) {
 | |
|             if ($inlitblock) {
 | |
|                 if (m/^\\end\{code\}/) { s/.*//; $inlitblock = 0; }
 | |
|             } elsif (!$inlitblock) {
 | |
|                 if (m/^\\begin\{code\}/) { s/.*//; $inlitblock = 1; }
 | |
|                 else { s/.*//; }
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         # keep pragmas
 | |
|         if (/^\s*{-#/) {
 | |
|             push @save_lines, $_;
 | |
|             next;
 | |
|         }
 | |
| 
 | |
|         # Elm allows nested comments so track nesting depth
 | |
|         # with $incomment.
 | |
| 
 | |
|         my $n_open  = () = $_ =~ /{-/g;
 | |
|         my $n_close = () = $_ =~ /-}/g;
 | |
|         s/{-.*?-}//g;
 | |
| 
 | |
|         if ($incomment) {
 | |
|             if (m/\-\}/) {
 | |
|                 s/^.*?\-\}//;
 | |
|                 if ($is_elm) {
 | |
|                     $incomment += $n_open - $n_close;
 | |
|                 } else {
 | |
|                     $incomment = 0;
 | |
|                 }
 | |
|             } else {
 | |
|                 s/.*//;
 | |
|             }
 | |
|         } else {
 | |
|             s/--.*//;
 | |
|             if (m/{-/ && (!m/{-#/)) {
 | |
|                 s/{-.*//;
 | |
|                 if ($is_elm) {
 | |
|                     $incomment += $n_open - $n_close;
 | |
|                 } else {
 | |
|                     $incomment = 1;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|         if (m/\S/) { push @save_lines, $_; }
 | |
|     }
 | |
| #   if ($incomment) {print "ERROR: ended in comment in $ARGV\n";}
 | |
| 
 | |
|     print "<- remove_haskell_comments\n" if $opt_v > 2;
 | |
|     return @save_lines;
 | |
| } # 1}}}
 | |
| sub print_lines {                            # {{{1
 | |
|     my ($file     , # in
 | |
|         $title    , # in
 | |
|         $ra_lines , # in
 | |
|        ) = @_;
 | |
|     printf "->%-30s %s\n", $file, $title;
 | |
|     for (my $i = 0; $i < scalar @{$ra_lines}; $i++) {
 | |
|         printf "%5d | %s", $i+1, $ra_lines->[$i];
 | |
|         print "\n" unless $ra_lines->[$i] =~ m{\n$}
 | |
|     }
 | |
| } # 1}}}
 | |
| sub set_constants {                          # {{{1
 | |
|     my ($rh_Language_by_Extension , # out
 | |
|         $rh_Language_by_Script    , # out
 | |
|         $rh_Language_by_File      , # out
 | |
|         $rh_Language_by_Prefix    , # out
 | |
|         $rhaa_Filters_by_Language , # out
 | |
|         $rh_Not_Code_Extension    , # out
 | |
|         $rh_Not_Code_Filename     , # out
 | |
|         $rh_Scale_Factor          , # out
 | |
|         $rh_Known_Binary_Archives , # out
 | |
|         $rh_EOL_continuation_re   , # out
 | |
|        ) = @_;
 | |
| # 1}}}
 | |
| %{$rh_Language_by_Extension} = (             # {{{1
 | |
|             'abap'        => 'ABAP'                  ,
 | |
|             'ac'          => 'm4'                    ,
 | |
|             'ada'         => 'Ada'                   ,
 | |
|             'adb'         => 'Ada'                   ,
 | |
|             'ads'         => 'Ada'                   ,
 | |
|             'adso'        => 'ADSO/IDSM'             ,
 | |
|             'ahkl'        => 'AutoHotkey'            ,
 | |
|             'ahk'         => 'AutoHotkey'            ,
 | |
|             'agda'        => 'Agda'                  ,
 | |
|             'lagda'       => 'Agda'                  ,
 | |
|             'aj'          => 'AspectJ'               ,
 | |
|             'am'          => 'make'                  ,
 | |
|             'ample'       => 'AMPLE'                 ,
 | |
|             'apl'         => 'APL'                   ,
 | |
|             'apla'        => 'APL'                   ,
 | |
|             'aplf'        => 'APL'                   ,
 | |
|             'aplo'        => 'APL'                   ,
 | |
|             'apln'        => 'APL'                   ,
 | |
|             'aplc'        => 'APL'                   ,
 | |
|             'apli'        => 'APL'                   ,
 | |
|             'applescript' => 'AppleScript'           ,
 | |
|             'dyalog'      => 'APL'                   ,
 | |
|             'dyapp'       => 'APL'                   ,
 | |
|             'mipage'      => 'APL'                   ,
 | |
|             'art'         => 'Arturo'                ,
 | |
|             'as'          => 'ActionScript'          ,
 | |
|             'adoc'        => 'AsciiDoc'              ,
 | |
|             'asciidoc'    => 'AsciiDoc'              ,
 | |
|             'dofile'      => 'AMPLE'                 ,
 | |
|             'startup'     => 'AMPLE'                 ,
 | |
|             'axd'         => 'ASP'                   ,
 | |
|             'ashx'        => 'ASP'                   ,
 | |
|             'asa'         => 'ASP'                   ,
 | |
|             'asax'        => 'ASP.NET'               ,
 | |
|             'ascx'        => 'ASP.NET'               ,
 | |
|             'asd'         => 'Lisp'                  , # system definition file
 | |
|             'asmx'        => 'ASP.NET'               ,
 | |
|             'asp'         => 'ASP'                   ,
 | |
|             'aspx'        => 'ASP.NET'               ,
 | |
|             'master'      => 'ASP.NET'               ,
 | |
|             'sitemap'     => 'ASP.NET'               ,
 | |
|             'nasm'        => 'Assembly'              ,
 | |
|             'a51'         => 'Assembly'              ,
 | |
|             'asm'         => 'Assembly'              ,
 | |
|             'astro'       => 'Astro'                 ,
 | |
|             'asy'         => 'Asymptote'             ,
 | |
|             'cshtml'      => 'Razor'                 ,
 | |
|             'razor'       => 'Razor'                 , # Client-side Blazor
 | |
|             'nawk'        => 'awk'                   ,
 | |
|             'mawk'        => 'awk'                   ,
 | |
|             'gawk'        => 'awk'                   ,
 | |
|             'auk'         => 'awk'                   ,
 | |
|             'awk'         => 'awk'                   ,
 | |
|             'bash'        => 'Bourne Again Shell'    ,
 | |
|             'bazel'       => 'Starlark'              ,
 | |
|             'BUILD'       => 'Bazel'                 ,
 | |
|             'dxl'         => 'DOORS Extension Language',
 | |
|             'bat'         => 'DOS Batch'             ,
 | |
|             'BAT'         => 'DOS Batch'             ,
 | |
|             'cmd'         => 'DOS Batch'             ,
 | |
|             'CMD'         => 'DOS Batch'             ,
 | |
|             'btm'         => 'DOS Batch'             ,
 | |
|             'BTM'         => 'DOS Batch'             ,
 | |
|             'blade'       => 'Blade'                 ,
 | |
|             'blade.php'   => 'Blade'                 ,
 | |
|             'build.xml'   => 'Ant'                   ,
 | |
|             'b'           => 'Brainfuck'             ,
 | |
|             'bf'          => 'Brainfuck'             ,
 | |
|             'brs'         => 'BrightScript'          ,
 | |
|             'bzl'         => 'Starlark'              ,
 | |
|             'btp'         => 'BizTalk Pipeline'      ,
 | |
|             'odx'         => 'BizTalk Orchestration' ,
 | |
|             'carbon'      => 'Carbon'                ,
 | |
|             'cpy'         => 'COBOL'                 ,
 | |
|             'cobol'       => 'COBOL'                 ,
 | |
|             'ccp'         => 'COBOL'                 ,
 | |
|             'cbl'         => 'COBOL'                 ,
 | |
|             'CBL'         => 'COBOL'                 ,
 | |
|             'idc'         => 'C'                     ,
 | |
|             'cats'        => 'C'                     ,
 | |
|             'c'           => 'C'                     ,
 | |
|             'c++'         => 'C++'                   ,
 | |
|             'C'           => 'C++'                   ,
 | |
|             'cc'          => 'C++'                   ,
 | |
|             'ccm'         => 'C++'                   ,
 | |
|             'c++m'        => 'C++'                   ,
 | |
|             'cppm'        => 'C++'                   ,
 | |
|             'cxxm'        => 'C++'                   ,
 | |
|             'h++'         => 'C++'                   ,
 | |
|             'inl'         => 'C++'                   ,
 | |
|             'ipp'         => 'C++'                   ,
 | |
|             'ixx'         => 'C++'                   ,
 | |
|             'tcc'         => 'C++'                   ,
 | |
|             'tpp'         => 'C++'                   ,
 | |
|             'ccs'         => 'CCS'                   ,
 | |
|             'cfc'         => 'ColdFusion CFScript'   ,
 | |
|             'cfml'        => 'ColdFusion'            ,
 | |
|             'cfm'         => 'ColdFusion'            ,
 | |
|             'chpl'        => 'Chapel'                ,
 | |
|             'cl'          => 'Lisp/OpenCL'           ,
 | |
|             'riemann.config'=> 'Clojure'               ,
 | |
|             'hic'         => 'Clojure'               ,
 | |
|             'cljx'        => 'Clojure'               ,
 | |
|             'cljscm'      => 'Clojure'               ,
 | |
|             'cljs.hl'     => 'Clojure'               ,
 | |
|             'cl2'         => 'Clojure'               ,
 | |
|             'boot'        => 'Clojure'               ,
 | |
|             'clj'         => 'Clojure'               ,
 | |
|             'cljs'        => 'ClojureScript'         ,
 | |
|             'cljc'        => 'ClojureC'              ,
 | |
|             'cls'         => 'Visual Basic/TeX/Apex Class' ,
 | |
|             'cmake.in'    => 'CMake'                 ,
 | |
|             'CMakeLists.txt' => 'CMake'              ,
 | |
|             'cmake'       => 'CMake'                 ,
 | |
|             'cob'         => 'COBOL'                 ,
 | |
|             'COB'         => 'COBOL'                 ,
 | |
|             'cocoa5'      => 'CoCoA 5'               ,
 | |
|             'c5'          => 'CoCoA 5'               ,
 | |
|             'cpkg5'       => 'CoCoA 5'               ,
 | |
|             'cocoa5server'=> 'CoCoA 5'               ,
 | |
|             'iced'        => 'CoffeeScript'          ,
 | |
|             'cjsx'        => 'CoffeeScript'          ,
 | |
|             'cakefile'    => 'CoffeeScript'          ,
 | |
|             '_coffee'     => 'CoffeeScript'          ,
 | |
|             'coffee'      => 'CoffeeScript'          ,
 | |
|             'component'   => 'Visualforce Component' ,
 | |
|             'cg3'         => 'Constraint Grammar'    ,
 | |
|             'rlx'         => 'Constraint Grammar'    ,
 | |
|             'Containerfile'  => 'Containerfile'      ,
 | |
|             'cpp'         => 'C++'                   ,
 | |
|             'CPP'         => 'C++'                   ,
 | |
|             'cr'          => 'Crystal'               ,
 | |
|             'cs'          => 'C#/Smalltalk'          ,
 | |
|             'designer.cs' => 'C# Designer'           ,
 | |
|             'cake'        => 'Cake Build Script'     ,
 | |
|             'csh'         => 'C Shell'               ,
 | |
|             'cson'        => 'CSON'                  ,
 | |
|             'css'         => "CSS"                   ,
 | |
|             'csv'         => "CSV"                   ,
 | |
|             'cu'          => 'CUDA'                  ,
 | |
|             'cuh'         => 'CUDA'                  , # CUDA header file
 | |
|             'cxx'         => 'C++'                   ,
 | |
|             'd'           => 'D/dtrace'              ,
 | |
| # in addition, .d can map to init.d files typically written as
 | |
| # bash or sh scripts
 | |
|             'dfy'         => 'Dafny'                 ,
 | |
|             'da'          => 'DAL'                   ,
 | |
|             'dart'        => 'Dart'                  ,
 | |
|             'dsc'         => 'DenizenScript'         ,
 | |
|             'derw'        => 'Derw'                  ,
 | |
|             'def'         => 'Windows Module Definition',
 | |
|             'dhall'       => 'dhall'                 ,
 | |
|             'dt'          => 'DIET'                  ,
 | |
|             'patch'       => 'diff'                  ,
 | |
|             'diff'        => 'diff'                  ,
 | |
|             'dmap'        => 'NASTRAN DMAP'          ,
 | |
|             'sthlp'       => 'Stata'                 ,
 | |
|             'matah'       => 'Stata'                 ,
 | |
|             'mata'        => 'Stata'                 ,
 | |
|             'ihlp'        => 'Stata'                 ,
 | |
|             'doh'         => 'Stata'                 ,
 | |
|             'ado'         => 'Stata'                 ,
 | |
|             'do'          => 'Stata'                 ,
 | |
|             'DO'          => 'Stata'                 ,
 | |
|             'Dockerfile'  => 'Dockerfile'            ,
 | |
|             'dockerfile'  => 'Dockerfile'            ,
 | |
|             'pascal'      => 'Pascal'                ,
 | |
|             'lpr'         => 'Pascal'                ,
 | |
|             'dfm'         => 'Delphi Form'           ,
 | |
|             'dpr'         => 'Pascal'                ,
 | |
|             'dita'        => 'DITA'                  ,
 | |
|             'drl'         => 'Drools'                ,
 | |
|             'dtd'         => 'DTD'                   ,
 | |
|             'ec'          => 'C'                     ,
 | |
|             'ecpp'        => 'ECPP'                  ,
 | |
|             'eex'         => 'EEx'                   ,
 | |
|             'el'          => 'Lisp'                  ,
 | |
|             'elm'         => 'Elm'                   ,
 | |
|             'exs'         => 'Elixir'                ,
 | |
|             'ex'          => 'Elixir'                ,
 | |
|             'ecr'         => 'Embedded Crystal'      ,
 | |
|             'ejs'         => 'EJS'                   ,
 | |
|             'erb'         => 'ERB'                   ,
 | |
|             'ERB'         => 'ERB'                   ,
 | |
|             'ets'         => 'ArkTs'                 , # OpenHarmonyOS app language
 | |
|             'yrl'         => 'Erlang'                ,
 | |
|             'xrl'         => 'Erlang'                ,
 | |
|             'rebar.lock'  => 'Erlang'                ,
 | |
|             'rebar.config.lock'=> 'Erlang'           ,
 | |
|             'rebar.config'=> 'Erlang'                ,
 | |
|             'emakefile'   => 'Erlang'                ,
 | |
|             'app.src'     => 'Erlang'                ,
 | |
|             'erl'         => 'Erlang'                ,
 | |
|             'exp'         => 'Expect'                ,
 | |
|             '4th'         => 'Forth'                 ,
 | |
|             'fish'        => 'Fish Shell'            ,
 | |
|             'fsl'         => 'Finite State Language' ,
 | |
|             'jssm'        => 'Finite State Language' ,
 | |
|             'fnl'         => 'Fennel'                ,
 | |
|             'forth'       => 'Forth'                 ,
 | |
|             'fr'          => 'Forth'                 ,
 | |
|             'frt'         => 'Forth'                 ,
 | |
|             'fth'         => 'Forth'                 ,
 | |
|             'f83'         => 'Forth'                 ,
 | |
|             'fb'          => 'Forth'                 ,
 | |
|             'fpm'         => 'Forth'                 ,
 | |
|             'e4'          => 'Forth'                 ,
 | |
|             'rx'          => 'Forth'                 ,
 | |
|             'ft'          => 'Forth'                 ,
 | |
|             'f77'         => 'Fortran 77'            ,
 | |
|             'F77'         => 'Fortran 77'            ,
 | |
|             'f90'         => 'Fortran 90'            ,
 | |
|             'F90'         => 'Fortran 90'            ,
 | |
|             'f95'         => 'Fortran 95'            ,
 | |
|             'F95'         => 'Fortran 95'            ,
 | |
|             'f'           => 'Fortran 77/Forth'      ,
 | |
|             'F'           => 'Fortran 77'            ,
 | |
|             'for'         => 'Fortran 77/Forth'      ,
 | |
|             'FOR'         => 'Fortran 77'            ,
 | |
|             'ftl'         => 'Freemarker Template'   ,
 | |
|             'ftn'         => 'Fortran 77'            ,
 | |
|             'FTN'         => 'Fortran 77'            ,
 | |
|             'fmt'         => 'Oracle Forms'          ,
 | |
|             'focexec'     => 'Focus'                 ,
 | |
|             'fs'          => 'F#/Forth'              ,
 | |
|             'fsi'         => 'F#'                    ,
 | |
|             'fsx'         => 'F# Script'             ,
 | |
|             'fut'         => 'Futhark'               ,
 | |
|             'fxml'        => 'FXML'                  ,
 | |
|             'gnumakefile' => 'make'                  ,
 | |
|             'Gnumakefile' => 'make'                  ,
 | |
|             'gd'          => 'GDScript'              ,
 | |
|             'gdshader'    => 'Godot Shaders'         ,
 | |
|             'vshader'     => 'GLSL'                  ,
 | |
|             'vsh'         => 'GLSL'                  ,
 | |
|             'vrx'         => 'GLSL'                  ,
 | |
|             'gshader'     => 'GLSL'                  ,
 | |
|             'glslv'       => 'GLSL'                  ,
 | |
|             'geo'         => 'GLSL'                  ,
 | |
|             'fshader'     => 'GLSL'                  ,
 | |
|             'fsh'         => 'GLSL'                  ,
 | |
|             'frg'         => 'GLSL'                  ,
 | |
|             'fp'          => 'GLSL'                  ,
 | |
|             'fbs'         => 'Flatbuffers'           ,
 | |
|             'gjs'         => 'Glimmer JavaScript'    ,
 | |
|             'gts'         => 'Glimmer TypeScript'    ,
 | |
|             'glsl'        => 'GLSL'                  ,
 | |
|             'graphqls'    => 'GraphQL'               ,
 | |
|             'gql'         => 'GraphQL'               ,
 | |
|             'graphql'     => 'GraphQL'               ,
 | |
|             'vert'        => 'GLSL'                  ,
 | |
|             'tesc'        => 'GLSL'                  ,
 | |
|             'tese'        => 'GLSL'                  ,
 | |
|             'geom'        => 'GLSL'                  ,
 | |
|             'feature'     => 'Cucumber'              ,
 | |
|             'frag'        => 'GLSL'                  ,
 | |
|             'comp'        => 'GLSL'                  ,
 | |
|             'g'           => 'ANTLR Grammar'         ,
 | |
|             'g4'          => 'ANTLR Grammar'         ,
 | |
|             'gleam'       => 'Gleam'                 ,
 | |
|             'go'          => 'Go'                    ,
 | |
|             'ʕ◔ϖ◔ʔ'       => 'Go'                    ,
 | |
|             'gsp'         => 'Grails'                ,
 | |
|             'jenkinsfile' => 'Groovy'                ,
 | |
|             'gvy'         => 'Groovy'                ,
 | |
|             'gtpl'        => 'Groovy'                ,
 | |
|             'grt'         => 'Groovy'                ,
 | |
|             'groovy'      => 'Groovy'                ,
 | |
|             'gant'        => 'Groovy'                ,
 | |
|             'gradle'      => 'Gradle'                ,
 | |
|             'gradle.kts'  => 'Gradle'                ,
 | |
|             'h'           => 'C/C++ Header'          ,
 | |
|             'H'           => 'C/C++ Header'          ,
 | |
|             'hh'          => 'C/C++ Header'          ,
 | |
|             'hpp'         => 'C/C++ Header'          ,
 | |
|             'hxx'         => 'C/C++ Header'          ,
 | |
|             'hb'          => 'Harbour'               ,
 | |
|             'hrl'         => 'Erlang'                ,
 | |
|             'hsc'         => 'Haskell'               ,
 | |
|             'hs'          => 'Haskell'               ,
 | |
|             'tfvars'      => 'HCL'                   ,
 | |
|             'hcl'         => 'HCL'                   ,
 | |
|             'tf'          => 'HCL'                   ,
 | |
|             'nomad'       => 'HCL'                   ,
 | |
|             'hlsli'       => 'HLSL'                  ,
 | |
|             'fxh'         => 'HLSL'                  ,
 | |
|             'hlsl'        => 'HLSL'                  ,
 | |
|             'shader'      => 'HLSL'                  ,
 | |
|             'cg'          => 'HLSL'                  ,
 | |
|             'cginc'       => 'HLSL'                  ,
 | |
|             'haml.deface' => 'Haml'                  ,
 | |
|             'haml'        => 'Haml'                  ,
 | |
|             'handlebars'  => 'Handlebars'            ,
 | |
|             'hbs'         => 'Handlebars'            ,
 | |
|             'ha'          => 'Hare'                  ,
 | |
|             'hxsl'        => 'Haxe'                  ,
 | |
|             'hx'          => 'Haxe'                  ,
 | |
|             'HC'          => 'HolyC'                 ,
 | |
|             'hoon'        => 'Hoon'                  ,
 | |
|             'xht'         => 'HTML'                  ,
 | |
|             'html.hl'     => 'HTML'                  ,
 | |
|             'htm'         => 'HTML'                  ,
 | |
|             'html'        => 'HTML'                  ,
 | |
|             'heex'        => 'HTML EEx'              ,
 | |
|             'i3'          => 'Modula3'               ,
 | |
|             'ice'         => 'Slice'                 ,
 | |
|             'icl'         => 'Clean'                 ,
 | |
|             'dcl'         => 'Clean'                 ,
 | |
|             'dlm'         => 'IDL'                   ,
 | |
|             'idl'         => 'IDL'                   ,
 | |
|             'idr'         => 'Idris'                 ,
 | |
|             'lidr'        => 'Literate Idris'        ,
 | |
|             'imba'        => 'Imba'                  ,
 | |
|             'prefs'       => 'INI'                   ,
 | |
|             'lektorproject'=> 'INI'                  ,
 | |
|             'buildozer.spec'=> 'INI'                 ,
 | |
|             'ini'         => 'INI'                   ,
 | |
|             'editorconfig'=> 'INI'                   ,
 | |
|             'ism'         => 'InstallShield'         ,
 | |
|             'ipl'         => 'IPL'                   ,
 | |
|             'pro'         => 'IDL/Qt Project/Prolog/ProGuard' ,
 | |
|             'ig'          => 'Modula3'               ,
 | |
|             'il'          => 'SKILL'                 ,
 | |
|             'ils'         => 'SKILL++'               ,
 | |
|             'inc'         => 'PHP/Pascal/Fortran'    ,
 | |
|             'ino'         => 'Arduino Sketch'        ,
 | |
|             'ipf'         => 'Igor Pro'              ,
 | |
|            #'pde'         => 'Arduino Sketch'        , # pre 1.0
 | |
|             'pde'         => 'Processing'            , # pre 1.0
 | |
|             'itk'         => 'Tcl/Tk'                ,
 | |
|             'java'        => 'Java'                  ,
 | |
|             'jcl'         => 'JCL'                   , # IBM Job Control Lang.
 | |
|             'jl'          => 'Lisp/Julia'            ,
 | |
|             'jai'         => 'Jai'                   ,
 | |
|             'janet'       => 'Janet'                 ,
 | |
|             'xsjslib'     => 'JavaScript'            ,
 | |
|             'xsjs'        => 'JavaScript'            ,
 | |
|             'ssjs'        => 'JavaScript'            ,
 | |
|             'sjs'         => 'JavaScript'            ,
 | |
|             'pac'         => 'JavaScript'            ,
 | |
|             'njs'         => 'JavaScript'            ,
 | |
|             'mjs'         => 'JavaScript'            ,
 | |
|             'cjs'         => 'JavaScript'            ,
 | |
|             'jss'         => 'JavaScript'            ,
 | |
|             'jsm'         => 'JavaScript'            ,
 | |
|             'jsfl'        => 'JavaScript'            ,
 | |
|             'jscad'       => 'JavaScript'            ,
 | |
|             'jsb'         => 'JavaScript'            ,
 | |
|             'jakefile'    => 'JavaScript'            ,
 | |
|             'jake'        => 'JavaScript'            ,
 | |
|             'bones'       => 'JavaScript'            ,
 | |
|             '_js'         => 'JavaScript'            ,
 | |
|             'js'          => 'JavaScript'            ,
 | |
|             'es6'         => 'JavaScript'            ,
 | |
|             'jsf'         => 'JavaServer Faces'      ,
 | |
|             'jsx'         => 'JSX'                   ,
 | |
|             'xhtml'       => 'XHTML'                 ,
 | |
|             'j2'          => 'Jinja Template'        ,
 | |
|             'jinja'       => 'Jinja Template'        ,
 | |
|             'jinja2'      => 'Jinja Template'        ,
 | |
|             'yyp'         => 'JSON'                  ,
 | |
|             'webmanifest' => 'JSON'                  ,
 | |
|             'webapp'      => 'JSON'                  ,
 | |
|             'topojson'    => 'JSON'                  ,
 | |
|             'tfstate.backup'=> 'JSON'                  ,
 | |
|             'tfstate'     => 'JSON'                  ,
 | |
|             'mcmod.info'  => 'JSON'                  ,
 | |
|             'mcmeta'      => 'JSON'                  ,
 | |
|             'json-tmlanguage'=> 'JSON'                  ,
 | |
|             'jsonl'       => 'JSON'                  ,
 | |
|             'har'         => 'JSON'                  ,
 | |
|             'gltf'        => 'JSON'                  ,
 | |
|             'geojson'     => 'JSON'                  ,
 | |
|             'composer.lock'=> 'JSON'                  ,
 | |
|             'avsc'        => 'JSON'                  ,
 | |
|             'watchmanconfig'=> 'JSON'                  ,
 | |
|             'tern-project'=> 'JSON'                  ,
 | |
|             'tern-config' => 'JSON'                  ,
 | |
|             'htmlhintrc'  => 'JSON'                  ,
 | |
|             'arcconfig'   => 'JSON'                  ,
 | |
|             'json'        => 'JSON'                  ,
 | |
|             'json5'       => 'JSON5'                 ,
 | |
|             'jsp'         => 'JSP'                   , # Java server pages
 | |
|             'jspf'        => 'JSP'                   , # Java server pages
 | |
|             'junos'       => 'Juniper Junos'         ,
 | |
|             'vm'          => 'Velocity Template Language' ,
 | |
|             'kv'          => 'kvlang'                ,
 | |
|             'ksc'         => 'Kermit'                ,
 | |
|             'ksh'         => 'Korn Shell'            ,
 | |
|             'ktm'         => 'Kotlin'                ,
 | |
|             'kt'          => 'Kotlin'                ,
 | |
|             'kts'         => 'Kotlin'                ,
 | |
|             'hlean'       => 'Lean'                  ,
 | |
|             'lean'        => 'Lean'                  ,
 | |
|             'lhs'         => 'Haskell'               ,
 | |
|             'lex'         => 'lex'                   ,
 | |
|             'l'           => 'lex'                   ,
 | |
|             'ld'          => 'Linker Script'         ,
 | |
|             'lem'         => 'Lem'                   ,
 | |
|             'less'        => 'LESS'                  ,
 | |
|             'lfe'         => 'LFE'                   ,
 | |
|             'liquid'      => 'liquid'                ,
 | |
|             'lsp'         => 'Lisp'                  ,
 | |
|             'lisp'        => 'Lisp'                  ,
 | |
|             'll'          => 'LLVM IR'               ,
 | |
|             'lgt'         => 'Logtalk'               ,
 | |
|             'logtalk'     => 'Logtalk'               ,
 | |
|             'lp'          => 'AnsProlog'             ,  # Answer Set Programming / clingo
 | |
|             'wlua'        => 'Lua'                   ,
 | |
|             'rbxs'        => 'Lua'                   ,
 | |
|             'pd_lua'      => 'Lua'                   ,
 | |
|             'p8'          => 'Lua'                   ,
 | |
|             'nse'         => 'Lua'                   ,
 | |
|             'lua'         => 'Lua'                   ,
 | |
|             'luau'        => 'Luau'                  ,
 | |
|             'm3'          => 'Modula3'               ,
 | |
|             'm4'          => 'm4'                    ,
 | |
|             'makefile'    => 'make'                  ,
 | |
|             'Makefile'    => 'make'                  ,
 | |
|             'mao'         => 'Mako'                  ,
 | |
|             'mako'        => 'Mako'                  ,
 | |
|             'workbook'    => 'Markdown'              ,
 | |
|             'ronn'        => 'Markdown'              ,
 | |
|             'mkdown'      => 'Markdown'              ,
 | |
|             'mkdn'        => 'Markdown'              ,
 | |
|             'mkd'         => 'Markdown'              ,
 | |
|             'mdx'         => 'Markdown'              ,
 | |
|             'mdwn'        => 'Markdown'              ,
 | |
|             'mdown'       => 'Markdown'              ,
 | |
|             'markdown'    => 'Markdown'              ,
 | |
|             'contents.lr' => 'Markdown'              ,
 | |
|             'md'          => 'Markdown'              ,
 | |
|             'mc'          => 'Windows Message File'  ,
 | |
|             'met'         => 'Teamcenter met'        ,
 | |
|             'mg'          => 'Modula3'               ,
 | |
|             'mojom'       => 'Mojom'                 ,
 | |
|             'mojo'        => 'Mojo'                  ,
 | |
|             '🔥'          => 'Mojo'                  ,
 | |
|             'meson.build' => 'Meson'                 ,
 | |
|             'metal'       => 'Metal'                 ,
 | |
|             'mk'          => 'make'                  ,
 | |
| #           'mli'         => 'ML'                    , # ML not implemented
 | |
| #           'ml'          => 'ML'                    ,
 | |
|             'ml4'         => 'OCaml'                 ,
 | |
|             'eliomi'      => 'OCaml'                 ,
 | |
|             'eliom'       => 'OCaml'                 ,
 | |
|             'ml'          => 'OCaml'                 ,
 | |
|             'mli'         => 'OCaml'                 ,
 | |
|             'mly'         => 'OCaml'                 ,
 | |
|             'mll'         => 'OCaml'                 ,
 | |
|             'm'           => 'MATLAB/Mathematica/Objective-C/MUMPS/Mercury' ,
 | |
|             'mm'          => 'Objective-C++'         ,
 | |
|             'msg'         => 'Gencat NLS'            ,
 | |
|             'nbp'         => 'Mathematica'           ,
 | |
|             'mathematica' => 'Mathematica'           ,
 | |
|             'ma'          => 'Mathematica'           ,
 | |
|             'cdf'         => 'Mathematica'           ,
 | |
|             'mt'          => 'Mathematica'           ,
 | |
|             'wl'          => 'Mathematica'           ,
 | |
|             'wlt'         => 'Mathematica'           ,
 | |
|             'mo'          => 'Modelica'              ,
 | |
|             'mustache'    => 'Mustache'              ,
 | |
|             'wdproj'      => 'MSBuild script'        ,
 | |
|             'csproj'      => 'MSBuild script'        ,
 | |
|             'vcproj'      => 'MSBuild script'        ,
 | |
|             'wixproj'     => 'MSBuild script'        ,
 | |
|             'btproj'      => 'MSBuild script'        ,
 | |
|             'msbuild'     => 'MSBuild script'        ,
 | |
|             'sln'         => 'Visual Studio Solution',
 | |
|             'mps'         => 'MUMPS'                 ,
 | |
|             'mth'         => 'Teamcenter mth'        ,
 | |
|             'n'           => 'Nemerle'               ,
 | |
|             'nlogo'       => 'NetLogo'               ,
 | |
|             'nls'         => 'NetLogo'               ,
 | |
|             'ncl'         => 'Nickel'                ,
 | |
|             'nims'        => 'Nim'                   ,
 | |
|             'nimrod'      => 'Nim'                   ,
 | |
|             'nimble'      => 'Nim'                   ,
 | |
|             'nim.cfg'     => 'Nim'                   ,
 | |
|             'nim'         => 'Nim'                   ,
 | |
|             'nix'         => 'Nix'                   ,
 | |
|             'nut'         => 'Squirrel'              ,
 | |
|             'njk'         => 'Nunjucks'              ,
 | |
|             'odin'        => 'Odin'                  ,
 | |
|             'oscript'     => 'LiveLink OScript'      ,
 | |
|             'bod'         => 'Oracle PL/SQL'         ,
 | |
|             'spc'         => 'Oracle PL/SQL'         ,
 | |
|             'fnc'         => 'Oracle PL/SQL'         ,
 | |
|             'prc'         => 'Oracle PL/SQL'         ,
 | |
|             'trg'         => 'Oracle PL/SQL'         ,
 | |
|             'pad'         => 'Ada'                   , # Oracle Ada preprocessor
 | |
|             'page'        => 'Visualforce Page'      ,
 | |
|             'pas'         => 'Pascal'                ,
 | |
|             'pcc'         => 'C++'                   , # Oracle C++ preprocessor
 | |
|             'rexfile'     => 'Perl'                  ,
 | |
|             'psgi'        => 'Perl'                  ,
 | |
|             'ph'          => 'Perl'                  ,
 | |
|             'makefile.pl' => 'Perl'                  ,
 | |
|             'cpanfile'    => 'Perl'                  ,
 | |
|             'al'          => 'Perl'                  ,
 | |
|             'ack'         => 'Perl'                  ,
 | |
|             'perl'        => 'Perl'                  ,
 | |
|             'pfo'         => 'Fortran 77'            ,
 | |
|             'pgc'         => 'C'                     , # Postgres embedded C/C++
 | |
|             'phpt'        => 'PHP'                   ,
 | |
|             'phps'        => 'PHP'                   ,
 | |
|             'phakefile'   => 'PHP'                   ,
 | |
|             'ctp'         => 'PHP'                   ,
 | |
|             'aw'          => 'PHP'                   ,
 | |
|             'php_cs.dist' => 'PHP'                   ,
 | |
|             'php_cs'      => 'PHP'                   ,
 | |
|             'php3'        => 'PHP'                   ,
 | |
|             'php4'        => 'PHP'                   ,
 | |
|             'php5'        => 'PHP'                   ,
 | |
|             'php'         => 'PHP'                   ,
 | |
|             'phtml'       => 'PHP'                   ,
 | |
|             'pig'         => 'Pig Latin'             ,
 | |
|             'plh'         => 'Perl'                  ,
 | |
|             'pl'          => 'Perl/Prolog'           ,
 | |
|             'PL'          => 'Perl/Prolog'           ,
 | |
|             'p6'          => 'Raku/Prolog'           ,
 | |
|             'P6'          => 'Raku/Prolog'           ,
 | |
|             'plx'         => 'Perl'                  ,
 | |
|             'pm'          => 'Perl'                  ,
 | |
|             'pm6'         => 'Raku'                  ,
 | |
|             'raku'        => 'Raku'                  ,
 | |
|             'rakumod'     => 'Raku'                  ,
 | |
|             'pom.xml'     => 'Maven'                 ,
 | |
|             'pom'         => 'Maven'                 ,
 | |
|             'scad'        => 'OpenSCAD'              ,
 | |
|             'yap'         => 'Prolog'                ,
 | |
|             'prolog'      => 'Prolog'                ,
 | |
|             'P'           => 'Prolog'                ,
 | |
|             'p'           => 'Pascal'                ,
 | |
|             'pp'          => 'Pascal/Puppet'         ,
 | |
|             'viw'         => 'SQL'                   ,
 | |
|             'udf'         => 'SQL'                   ,
 | |
|             'tab'         => 'SQL'                   ,
 | |
|             'mysql'       => 'SQL'                   ,
 | |
|             'cql'         => 'SQL'                   ,
 | |
|             'psql'        => 'SQL'                   ,
 | |
|             'xpy'         => 'Python'                ,
 | |
|             'wsgi'        => 'Python'                ,
 | |
|             'wscript'     => 'Python'                ,
 | |
|             'workspace'   => 'Python'                ,
 | |
|             'tac'         => 'Python'                ,
 | |
|             'snakefile'   => 'Python'                ,
 | |
|             'sconstruct'  => 'Python'                ,
 | |
|             'sconscript'  => 'Python'                ,
 | |
|             'pyt'         => 'Python'                ,
 | |
|             'pyp'         => 'Python'                ,
 | |
|             'pyi'         => 'Python'                ,
 | |
|             'pyde'        => 'Python'                ,
 | |
|             'py3'         => 'Python'                ,
 | |
|             'lmi'         => 'Python'                ,
 | |
|             'gypi'        => 'Python'                ,
 | |
|             'gyp'         => 'Python'                ,
 | |
|             'build.bazel' => 'Python'                ,
 | |
|             'buck'        => 'Python'                ,
 | |
|             'gclient'     => 'Python'                ,
 | |
|             'py'          => 'Python'                ,
 | |
|             'pyw'         => 'Python'                ,
 | |
|             'ipynb'       => 'Jupyter Notebook'      ,
 | |
|             'pyj'         => 'RapydScript'           ,
 | |
|             'pxi'         => 'Cython'                ,
 | |
|             'pxd'         => 'Cython'                ,
 | |
|             'pyx'         => 'Cython'                ,
 | |
|             'qbs'         => 'QML'                   ,
 | |
|             'qml'         => 'QML'                   ,
 | |
|             'watchr'      => 'Ruby'                  ,
 | |
|             'vagrantfile' => 'Ruby'                  ,
 | |
|             'thorfile'    => 'Ruby'                  ,
 | |
|             'thor'        => 'Ruby'                  ,
 | |
|             'snapfile'    => 'Ruby'                  ,
 | |
|             'ru'          => 'Ruby'                  ,
 | |
|             'rbx'         => 'Ruby'                  ,
 | |
|             'rbw'         => 'Ruby'                  ,
 | |
|             'rbuild'      => 'Ruby'                  ,
 | |
|             'rabl'        => 'Ruby'                  ,
 | |
|             'puppetfile'  => 'Ruby'                  ,
 | |
|             'podfile'     => 'Ruby'                  ,
 | |
|             'mspec'       => 'Ruby'                  ,
 | |
|             'mavenfile'   => 'Ruby'                  ,
 | |
|             'jbuilder'    => 'Ruby'                  ,
 | |
|             'jarfile'     => 'Ruby'                  ,
 | |
|             'guardfile'   => 'Ruby'                  ,
 | |
|             'god'         => 'Ruby'                  ,
 | |
|             'gemspec'     => 'Ruby'                  ,
 | |
|             'gemfile.lock'=> 'Ruby'                  ,
 | |
|             'gemfile'     => 'Ruby'                  ,
 | |
|             'fastfile'    => 'Ruby'                  ,
 | |
|             'eye'         => 'Ruby'                  ,
 | |
|             'deliverfile' => 'Ruby'                  ,
 | |
|             'dangerfile'  => 'Ruby'                  ,
 | |
|             'capfile'     => 'Ruby'                  ,
 | |
|             'buildfile'   => 'Ruby'                  ,
 | |
|             'builder'     => 'Ruby'                  ,
 | |
|             'brewfile'    => 'Ruby'                  ,
 | |
|             'berksfile'   => 'Ruby'                  ,
 | |
|             'appraisals'  => 'Ruby'                  ,
 | |
|             'pryrc'       => 'Ruby'                  ,
 | |
|             'irbrc'       => 'Ruby'                  ,
 | |
|             'rb'          => 'Ruby'                  ,
 | |
|             'podspec'     => 'Ruby'                  ,
 | |
|             'rake'        => 'Ruby'                  ,
 | |
|          #  'resx'        => 'ASP.NET'               ,
 | |
|             'rex'         => 'Oracle Reports'        ,
 | |
|             'pprx'        => 'Rexx'                  ,
 | |
|             'rexx'        => 'Rexx'                  ,
 | |
|             'rhtml'       => 'Ruby HTML'             ,
 | |
|             'circom'      => 'Circom'                ,
 | |
|             'cairo'       => 'Cairo'                 ,
 | |
|             'rs.in'       => 'Rust'                  ,
 | |
|             'rs'          => 'Rust'                  ,
 | |
|             'rst.txt'     => 'reStructuredText'      ,
 | |
|             'rest.txt'    => 'reStructuredText'      ,
 | |
|             'rest'        => 'reStructuredText'      ,
 | |
|             'rst'         => 'reStructuredText'      ,
 | |
|             's'           => 'Assembly'              ,
 | |
|             'S'           => 'Assembly'              ,
 | |
|             'SCA'         => 'Visual Fox Pro'        ,
 | |
|             'sca'         => 'Visual Fox Pro'        ,
 | |
|             'sbt'         => 'Scala'                 ,
 | |
|             'kojo'        => 'Scala'                 ,
 | |
|             'scala'       => 'Scala'                 ,
 | |
|             'sbl'         => 'Softbridge Basic'      ,
 | |
|             'SBL'         => 'Softbridge Basic'      ,
 | |
|             'sed'         => 'sed'                   ,
 | |
|             'sp'          => 'SparForte'             ,
 | |
|             'sol'         => 'Solidity'              ,
 | |
|             'p4'          => 'P4'                    ,
 | |
|             'ses'         => 'Patran Command Language'   ,
 | |
|             'pcl'         => 'Patran Command Language'   ,
 | |
|             'peg'         => 'PEG'                   ,
 | |
|             'pegjs'       => 'peg.js'                ,
 | |
|             'peggy'       => 'peggy'                 ,
 | |
|             'pest'        => 'Pest'                  ,
 | |
|             'prisma'      => 'Prisma Schema'         ,
 | |
|             'tspeg'       => 'tspeg'                 ,
 | |
|             'jspeg'       => 'tspeg'                 ,
 | |
|             'pl1'         => 'PL/I'                  ,
 | |
|             'plm'         => 'PL/M'                  ,
 | |
|             'lit'         => 'PL/M'                  ,
 | |
|             'iuml'        => 'PlantUML'              ,
 | |
|             'pu'          => 'PlantUML'              ,
 | |
|             'puml'        => 'PlantUML'              ,
 | |
|             'plantuml'    => 'PlantUML'              ,
 | |
|             'wsd'         => 'PlantUML'              ,
 | |
|             'properties'  => 'Properties'            ,
 | |
|             'po'          => 'PO File'               ,
 | |
|             'pony'        => 'Pony'                  ,
 | |
|             'pbt'         => 'PowerBuilder'          ,
 | |
|             'sra'         => 'PowerBuilder'          ,
 | |
|             'srf'         => 'PowerBuilder'          ,
 | |
|             'srm'         => 'PowerBuilder'          ,
 | |
|             'srs'         => 'PowerBuilder'          ,
 | |
|             'sru'         => 'PowerBuilder'          ,
 | |
|             'srw'         => 'PowerBuilder'          ,
 | |
|             'jade'        => 'Pug'                   ,
 | |
|             'pug'         => 'Pug'                   ,
 | |
|             'purs'        => 'PureScript'            ,
 | |
|             'prefab'      => 'Unity-Prefab'          ,
 | |
|             'proto'       => 'Protocol Buffers'      ,
 | |
|             'mat'         => 'Unity-Prefab'          ,
 | |
|             'ps1'         => 'PowerShell'            ,
 | |
|             'psd1'        => 'PowerShell'            ,
 | |
|             'psm1'        => 'PowerShell'            ,
 | |
|             'prql'        => 'PRQL'                  ,
 | |
|             'rsx'         => 'R'                     ,
 | |
|             'rd'          => 'R'                     ,
 | |
|             'expr-dist'   => 'R'                     ,
 | |
|             'rprofile'    => 'R'                     ,
 | |
|             'R'           => 'R'                     ,
 | |
|             'r'           => 'R'                     ,
 | |
|             'raml'        => 'RAML'                  ,
 | |
|             'ring'        => 'Ring'                  ,
 | |
|             'rh'          => 'Ring'                  ,
 | |
|             'rform'       => 'Ring'                  ,
 | |
|             'rktd'        => 'Racket'                ,
 | |
|             'rkt'         => 'Racket'                ,
 | |
|             'rktl'        => 'Racket'                ,
 | |
|             'Rmd'         => 'Rmd'                   ,
 | |
|             're'          => 'ReasonML'              ,
 | |
|             'rei'         => 'ReasonML'              ,
 | |
|             'res'         => 'ReScript'              ,
 | |
|             'resi'        => 'ReScript'              ,
 | |
|             'scrbl'       => 'Racket'                ,
 | |
|             'sps'         => 'Scheme'                ,
 | |
|             'sc'          => 'Scheme'                ,
 | |
|             'ss'          => 'Scheme'                ,
 | |
|             'scm'         => 'Scheme'                ,
 | |
|             'sch'         => 'Scheme'                ,
 | |
|             'sls'         => 'Scheme/SaltStack'      ,
 | |
|             'sld'         => 'Scheme'                ,
 | |
|             'robot'       => 'RobotFramework'        ,
 | |
|             'rc'          => 'Windows Resource File' ,
 | |
|             'rc2'         => 'Windows Resource File' ,
 | |
|             'sas'         => 'SAS'                   ,
 | |
|             'sass'        => 'Sass'                  ,
 | |
|             'scss'        => 'SCSS'                  ,
 | |
|             'sh'          => 'Bourne Shell'          ,
 | |
|             'smarty'      => 'Smarty'                ,
 | |
|             'sml'         => 'Standard ML'           ,
 | |
|             'sig'         => 'Standard ML'           ,
 | |
|             'fun'         => 'Standard ML'           ,
 | |
|             'slim'        => 'Slim'                  ,
 | |
|             'e'           => 'Specman e'             ,
 | |
|             'sql'         => 'SQL'                   ,
 | |
|             'SQL'         => 'SQL'                   ,
 | |
|             'sproc.sql'   => 'SQL Stored Procedure'  ,
 | |
|             'spoc.sql'    => 'SQL Stored Procedure'  ,
 | |
|             'spc.sql'     => 'SQL Stored Procedure'  ,
 | |
|             'udf.sql'     => 'SQL Stored Procedure'  ,
 | |
|             'data.sql'    => 'SQL Data'              ,
 | |
|             'sss'         => 'SugarSS'               ,
 | |
|             'slint'       => 'Slint'                 ,
 | |
|             'st'          => 'Smalltalk'             ,
 | |
|             'rules'       => 'Snakemake'             ,
 | |
|             'smk'         => 'Snakemake'             ,
 | |
|             'styl'        => 'Stylus'                ,
 | |
|             'i'           => 'SWIG'                  ,
 | |
|             'svelte'      => 'Svelte'                ,
 | |
|             'sv'          => 'Verilog-SystemVerilog' ,
 | |
|             'svh'         => 'Verilog-SystemVerilog' ,
 | |
|             'svg'         => 'SVG'                   ,
 | |
|             'SVG'         => 'SVG'                   ,
 | |
|             'v'           => 'Verilog-SystemVerilog/Coq' ,
 | |
|             'td'          => 'TableGen'              ,
 | |
|             'tcl'         => 'Tcl/Tk'                ,
 | |
|             'tcsh'        => 'C Shell'               ,
 | |
|             'tk'          => 'Tcl/Tk'                ,
 | |
|             'teal'        => 'TEAL'                  ,
 | |
|             'mkvi'        => 'TeX'                   ,
 | |
|             'mkiv'        => 'TeX'                   ,
 | |
|             'mkii'        => 'TeX'                   ,
 | |
|             'ltx'         => 'TeX'                   ,
 | |
|             'lbx'         => 'TeX'                   ,
 | |
|             'ins'         => 'TeX'                   ,
 | |
|             'cbx'         => 'TeX'                   ,
 | |
|             'bib'         => 'TeX'                   ,
 | |
|             'bbx'         => 'TeX'                   ,
 | |
|             'aux'         => 'TeX'                   ,
 | |
|             'tex'         => 'TeX'                   , # TeX, LaTex, MikTex, ..
 | |
|             'toml'        => 'TOML'                  ,
 | |
|             'sty'         => 'TeX'                   ,
 | |
| #           'cls'         => 'TeX'                   ,
 | |
|             'dtx'         => 'TeX'                   ,
 | |
|             'bst'         => 'TeX'                   ,
 | |
|             'txt'         => 'Text'                  ,
 | |
|             'text'        => 'Text'                  ,
 | |
|             'tres'        => 'Godot Resource'        ,
 | |
|             'tscn'        => 'Godot Scene'           ,
 | |
|             'thrift'      => 'Thrift'                ,
 | |
|             'tla'         => 'TLA+'                  ,
 | |
|             'tpl'         => 'Smarty'                ,
 | |
|             'trigger'     => 'Apex Trigger'          ,
 | |
|             'ttcn'        => 'TTCN'                  ,
 | |
|             'ttcn2'       => 'TTCN'                  ,
 | |
|             'ttcn3'       => 'TTCN'                  ,
 | |
|             'ttcnpp'      => 'TTCN'                  ,
 | |
|             'sdl'         => 'TNSDL'                 ,
 | |
|             'ssc'         => 'TNSDL'                 ,
 | |
|             'sdt'         => 'TNSDL'                 ,
 | |
|             'spd'         => 'TNSDL'                 ,
 | |
|             'sst'         => 'TNSDL'                 ,
 | |
|             'rou'         => 'TNSDL'                 ,
 | |
|             'cin'         => 'TNSDL'                 ,
 | |
|             'cii'         => 'TNSDL'                 ,
 | |
|             'interface'   => 'TNSDL'                 ,
 | |
|             'in1'         => 'TNSDL'                 ,
 | |
|             'in2'         => 'TNSDL'                 ,
 | |
|             'in3'         => 'TNSDL'                 ,
 | |
|             'in4'         => 'TNSDL'                 ,
 | |
|             'inf'         => 'TNSDL'                 ,
 | |
|             'tpd'         => 'TITAN Project File Information',
 | |
|             'ts'          => 'TypeScript/Qt Linguist',
 | |
|             'mts'         => 'TypeScript'            ,
 | |
|             'tsx'         => 'TypeScript'            ,
 | |
|             'tss'         => 'Titanium Style Sheet'  ,
 | |
|             'twig'        => 'Twig'                  ,
 | |
|             'typ'         => 'Typst'                 ,
 | |
|             'um'          => 'Umka'                  ,
 | |
|             'ui'          => 'Qt/Glade'              ,
 | |
|             'glade'       => 'Glade'                 ,
 | |
|             'vala'        => 'Vala'                  ,
 | |
|             'vapi'        => 'Vala Header'           ,
 | |
|             'vhw'         => 'VHDL'                  ,
 | |
|             'vht'         => 'VHDL'                  ,
 | |
|             'vhs'         => 'VHDL'                  ,
 | |
|             'vho'         => 'VHDL'                  ,
 | |
|             'vhi'         => 'VHDL'                  ,
 | |
|             'vhf'         => 'VHDL'                  ,
 | |
|             'vhd'         => 'VHDL'                  ,
 | |
|             'VHD'         => 'VHDL'                  ,
 | |
|             'vhdl'        => 'VHDL'                  ,
 | |
|             'VHDL'        => 'VHDL'                  ,
 | |
|             'bas'         => 'Visual Basic'          ,
 | |
|             'BAS'         => 'Visual Basic'          ,
 | |
|             'ctl'         => 'Visual Basic'          ,
 | |
|             'dsr'         => 'Visual Basic'          ,
 | |
|             'frm'         => 'Visual Basic'          ,
 | |
|             'frx'         => 'Visual Basic'          ,
 | |
|             'FRX'         => 'Visual Basic'          ,
 | |
|             'vba'         => 'VB for Applications'   ,
 | |
|             'VBA'         => 'VB for Applications'   ,
 | |
|             'vbhtml'      => 'Visual Basic'          ,
 | |
|             'VBHTML'      => 'Visual Basic'          ,
 | |
|             'vbproj'      => 'Visual Basic .NET'     ,
 | |
|             'vbp'         => 'Visual Basic'          , # .vbp - autogenerated
 | |
|             'vbs'         => 'Visual Basic Script'   ,
 | |
|             'VBS'         => 'Visual Basic Script'   ,
 | |
|             'vb'          => 'Visual Basic .NET'     ,
 | |
|             'VB'          => 'Visual Basic .NET'     ,
 | |
|             'vbw'         => 'Visual Basic'          , # .vbw - autogenerated
 | |
|             'vue'         => 'Vuejs Component'       ,
 | |
|             'vy'          => 'Vyper'                 ,
 | |
|             'webinfo'     => 'ASP.NET'               ,
 | |
|             'wsdl'        => 'Web Services Description',
 | |
|             'x'           => 'Logos'                 ,
 | |
|             'xm'          => 'Logos'                 ,
 | |
|             'xpo'         => 'X++'                   , # Microsoft Dynamics AX 4.0 export format
 | |
|             'xmi'         => 'XMI'                   ,
 | |
|             'XMI'         => 'XMI'                   ,
 | |
|             'zcml'        => 'XML'                   ,
 | |
|             'xul'         => 'XML'                   ,
 | |
|             'xspec'       => 'XML'                   ,
 | |
|             'xproj'       => 'XML'                   ,
 | |
|             'xml.dist'    => 'XML'                   ,
 | |
|             'xliff'       => 'XML'                   ,
 | |
|             'xlf'         => 'XML'                   ,
 | |
|             'xib'         => 'XML'                   ,
 | |
|             'xacro'       => 'XML'                   ,
 | |
|             'x3d'         => 'XML'                   ,
 | |
|             'wsf'         => 'XML'                   ,
 | |
|             'web.release.config'=> 'XML'             ,
 | |
|             'web.debug.config'=> 'XML'               ,
 | |
|             'web.config'  => 'XML'                   ,
 | |
|             'wxml'        => 'WXML'                  ,
 | |
|             'wxss'        => 'WXSS'                  ,
 | |
|             'vxml'        => 'XML'                   ,
 | |
|             'vstemplate'  => 'XML'                   ,
 | |
|             'vssettings'  => 'XML'                   ,
 | |
|             'vsixmanifest'=> 'XML'                   ,
 | |
|             'vcxproj'     => 'XML'                   ,
 | |
|             'ux'          => 'XML'                   ,
 | |
|             'urdf'        => 'XML'                   ,
 | |
|             'tmtheme'     => 'XML'                   ,
 | |
|             'tmsnippet'   => 'XML'                   ,
 | |
|             'tmpreferences'=> 'XML'                  ,
 | |
|             'tmlanguage'  => 'XML'                   ,
 | |
|             'tml'         => 'XML'                   ,
 | |
|             'tmcommand'   => 'XML'                   ,
 | |
|             'targets'     => 'XML'                   ,
 | |
|             'sublime-snippet'=> 'XML'                   ,
 | |
|             'sttheme'     => 'XML'                   ,
 | |
|             'storyboard'  => 'XML'                   ,
 | |
|             'srdf'        => 'XML'                   ,
 | |
|             'shproj'      => 'XML'                   ,
 | |
|             'sfproj'      => 'XML'                   ,
 | |
|             'settings.stylecop'=> 'XML'                   ,
 | |
|             'scxml'       => 'XML'                   ,
 | |
|             'rss'         => 'XML'                   ,
 | |
|             'resx'        => 'XML'                   ,
 | |
|             'rdf'         => 'XML'                   ,
 | |
|             'pt'          => 'XML'                   ,
 | |
|             'psc1'        => 'XML'                   ,
 | |
|             'ps1xml'      => 'XML'                   ,
 | |
|             'props'       => 'XML'                   ,
 | |
|             'proj'        => 'XML'                   ,
 | |
|             'plist'       => 'XML'                   ,
 | |
|             'pkgproj'     => 'XML'                   ,
 | |
|             'packages.config'=> 'XML'                   ,
 | |
|             'osm'         => 'XML'                   ,
 | |
|             'odd'         => 'XML'                   ,
 | |
|             'nuspec'      => 'XML'                   ,
 | |
|             'nuget.config'=> 'XML'                   ,
 | |
|             'nproj'       => 'XML'                   ,
 | |
|             'ndproj'      => 'XML'                   ,
 | |
|             'natvis'      => 'XML'                   ,
 | |
|             'mjml'        => 'XML'                   ,
 | |
|             'mdpolicy'    => 'XML'                   ,
 | |
|             'launch'      => 'XML'                   ,
 | |
|             'kml'         => 'XML'                   ,
 | |
|             'jsproj'      => 'XML'                   ,
 | |
|             'jelly'       => 'XML'                   ,
 | |
|             'ivy'         => 'XML'                   ,
 | |
|             'iml'         => 'XML'                   ,
 | |
|             'grxml'       => 'XML'                   ,
 | |
|             'gmx'         => 'XML'                   ,
 | |
|             'fsproj'      => 'XML'                   ,
 | |
|             'filters'     => 'XML'                   ,
 | |
|             'dotsettings' => 'XML'                   ,
 | |
|             'dll.config'  => 'XML'                   ,
 | |
|             'ditaval'     => 'XML'                   ,
 | |
|             'ditamap'     => 'XML'                   ,
 | |
|             'depproj'     => 'XML'                   ,
 | |
|             'ct'          => 'XML'                   ,
 | |
|             'csl'         => 'XML'                   ,
 | |
|             'csdef'       => 'XML'                   ,
 | |
|             'cscfg'       => 'XML'                   ,
 | |
|             'cproject'    => 'XML'                   ,
 | |
|             'clixml'      => 'XML'                   ,
 | |
|             'ccxml'       => 'XML'                   ,
 | |
|             'ccproj'      => 'XML'                   ,
 | |
|             'builds'      => 'XML'                   ,
 | |
|             'axml'        => 'XML'                   ,
 | |
|             'app.config'  => 'XML'                   ,
 | |
|             'ant'         => 'XML'                   ,
 | |
|             'admx'        => 'XML'                   ,
 | |
|             'adml'        => 'XML'                   ,
 | |
|             'project'     => 'XML'                   ,
 | |
|             'classpath'   => 'XML'                   ,
 | |
|             'xml'         => 'XML'                   ,
 | |
|             'XML'         => 'XML'                   ,
 | |
|             'mxml'        => 'MXML'                  ,
 | |
|             'xml.builder' => 'builder'               ,
 | |
|             'build'       => 'NAnt script'           ,
 | |
|             'vim'         => 'vim script'            ,
 | |
|             'swift'       => 'Swift'                 ,
 | |
|             'xaml'        => 'XAML'                  ,
 | |
|             'wast'        => 'WebAssembly'           ,
 | |
|             'wat'         => 'WebAssembly'           ,
 | |
|             'wgsl'        => 'WGSL'                  ,
 | |
|             'wxs'         => 'WiX source'            ,
 | |
|             'wxi'         => 'WiX include'           ,
 | |
|             'wxl'         => 'WiX string localization' ,
 | |
|             'prw'         => 'xBase'                 ,
 | |
|             'prg'         => 'xBase'                 ,
 | |
|             'ch'          => 'xBase Header'          ,
 | |
|             'xqy'         => 'XQuery'                ,
 | |
|             'xqm'         => 'XQuery'                ,
 | |
|             'xql'         => 'XQuery'                ,
 | |
|             'xq'          => 'XQuery'                ,
 | |
|             'xquery'      => 'XQuery'                ,
 | |
|             'xsd'         => 'XSD'                   ,
 | |
|             'XSD'         => 'XSD'                   ,
 | |
|             'xslt'        => 'XSLT'                  ,
 | |
|             'XSLT'        => 'XSLT'                  ,
 | |
|             'xsl'         => 'XSLT'                  ,
 | |
|             'XSL'         => 'XSLT'                  ,
 | |
|             'xtend'       => 'Xtend'                 ,
 | |
|             'yacc'        => 'yacc'                  ,
 | |
|             'y'           => 'yacc'                  ,
 | |
|             'yml.mysql'   => 'YAML'                  ,
 | |
|             'yaml-tmlanguage'=> 'YAML'                  ,
 | |
|             'syntax'      => 'YAML'                  ,
 | |
|             'sublime-syntax'=> 'YAML'                  ,
 | |
|             'rviz'        => 'YAML'                  ,
 | |
|             'reek'        => 'YAML'                  ,
 | |
|             'mir'         => 'YAML'                  ,
 | |
|             'glide.lock'  => 'YAML'                  ,
 | |
|             'gemrc'       => 'YAML'                  ,
 | |
|             'clang-tidy'  => 'YAML'                  ,
 | |
|             'clang-format'=> 'YAML'                  ,
 | |
|             'yaml'        => 'YAML'                  ,
 | |
|             'yml'         => 'YAML'                  ,
 | |
|             'zig'         => 'Zig'                   ,
 | |
|             'zsh'         => 'zsh'                   ,
 | |
|             );
 | |
| # 1}}}
 | |
| %{$rh_Language_by_Script}    = (             # {{{1
 | |
|             'awk'      => 'awk'                   ,
 | |
|             'bash'     => 'Bourne Again Shell'    ,
 | |
|             'bc'       => 'bc'                    ,# calculator
 | |
|             'crystal'  => 'Crystal'               ,
 | |
|             'csh'      => 'C Shell'               ,
 | |
|             'dmd'      => 'D'                     ,
 | |
|             'dtrace'   => 'dtrace'                ,
 | |
|             'escript'  => 'Erlang'                ,
 | |
|             'groovy'   => 'Groovy'                ,
 | |
|             'idl'      => 'IDL'                   ,
 | |
|             'kermit'   => 'Kermit'                ,
 | |
|             'ksh'      => 'Korn Shell'            ,
 | |
|             'lua'      => 'Lua'                   ,
 | |
|             'luau'     => 'Luau'                  ,
 | |
|             'make'     => 'make'                  ,
 | |
|             'octave'   => 'Octave'                ,
 | |
|             'perl5'    => 'Perl'                  ,
 | |
|             'perl'     => 'Perl'                  ,
 | |
|             'miniperl' => 'Perl'                  ,
 | |
|             'php'      => 'PHP'                   ,
 | |
|             'php5'     => 'PHP'                   ,
 | |
|             'python'   => 'Python'                ,
 | |
|             'python2.6'=> 'Python'                ,
 | |
|             'python2.7'=> 'Python'                ,
 | |
|             'python3'  => 'Python'                ,
 | |
|             'python3.3'=> 'Python'                ,
 | |
|             'python3.4'=> 'Python'                ,
 | |
|             'python3.5'=> 'Python'                ,
 | |
|             'python3.6'=> 'Python'                ,
 | |
|             'python3.7'=> 'Python'                ,
 | |
|             'python3.8'=> 'Python'                ,
 | |
|             'perl6'    => 'Raku'                  ,
 | |
|             'raku'     => 'Raku'                  ,
 | |
|             'rakudo'   => 'Raku'                  ,
 | |
|             'rexx'     => 'Rexx'                  ,
 | |
|             'regina'   => 'Rexx'                  ,
 | |
|             'ruby'     => 'Ruby'                  ,
 | |
|             'sed'      => 'sed'                   ,
 | |
|             'sh'       => 'Bourne Shell'          ,
 | |
|             'swipl'    => 'Prolog'                ,
 | |
|             'tcl'      => 'Tcl/Tk'                ,
 | |
|             'tclsh'    => 'Tcl/Tk'                ,
 | |
|             'tcsh'     => 'C Shell'               ,
 | |
|             'wish'     => 'Tcl/Tk'                ,
 | |
|             'zsh'      => 'zsh'                   ,
 | |
|             );
 | |
| # 1}}}
 | |
| %{$rh_Language_by_File}      = (             # {{{1
 | |
|             'build.xml'         => 'Ant/XML'            ,
 | |
|             'BUILD'             => 'Bazel'              ,
 | |
|             'WORKSPACE'         => 'Bazel'              ,
 | |
|             'cmakelists.txt'    => 'CMake'              ,
 | |
|             'CMakeLists.txt'    => 'CMake'              ,
 | |
|             'Jamfile'           => 'Jam'                ,
 | |
|             'jamfile'           => 'Jam'                ,
 | |
|             'Jamrules'          => 'Jam'                ,
 | |
|             'Makefile'          => 'make'               ,
 | |
|             'makefile'          => 'make'               ,
 | |
|             'meson.build'       => 'Meson'              ,
 | |
|             'Gnumakefile'       => 'make'               ,
 | |
|             'gnumakefile'       => 'make'               ,
 | |
|             'pom.xml'           => 'Maven/XML'          ,
 | |
|             'Rakefile'          => 'Ruby'               ,
 | |
|             'rakefile'          => 'Ruby'               ,
 | |
|             'Snakefile'         => 'Snakemake'          ,
 | |
|             'Dockerfile'        => 'Dockerfile'         ,
 | |
|             'Dockerfile.m4'     => 'Dockerfile'         ,
 | |
|             'Dockerfile.cmake'  => 'Dockerfile'         ,
 | |
|             'dockerfile'        => 'Dockerfile'         ,
 | |
|             'dockerfile.m4'     => 'Dockerfile'         ,
 | |
|             'dockerfile.cmake'  => 'Dockerfile'         ,
 | |
|             'Containerfile'     => 'Containerfile'      ,
 | |
|             );
 | |
| # 1}}}
 | |
| %{$rh_Language_by_Prefix}     = (             # {{{1
 | |
|             'Dockerfile'        => 'Dockerfile'         ,
 | |
|             'Containerfile'     => 'Containerfile'         ,
 | |
|             );
 | |
| # 1}}}
 | |
| %{$rhaa_Filters_by_Language} = (            # {{{1
 | |
|     '(unknown)'          => [ ],
 | |
|     'ABAP'               => [   [ 'remove_matches'      , '^\*'    ], ],
 | |
|     'ActionScript'       => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Apex Class'         => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'AppleScript'         => [
 | |
|                                 [ 'remove_matches'      , '^\s*--' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '(*', '*)' ],
 | |
|                                 [ 'remove_between_general', '(*', '*)' ],
 | |
|                                 [ 'remove_inline'       , '--.*$'  ],
 | |
|                             ],
 | |
|     'ASP'                => [   [ 'remove_matches'      , '^\s*\47'], ],  # \47 = '
 | |
|     'ASP.NET'            => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_between_general', '<%--', '--%>' ],
 | |
|                                 [ 'remove_between_general', '<!--', '-->' ],
 | |
|                             ],
 | |
|     'Ada'                => [   [ 'remove_matches'      , '^\s*--' ], ],
 | |
|     'ADSO/IDSM'          => [   [ 'remove_matches'      , '^\s*\*[\+\!]' ], ],
 | |
|     'Agda'               => [   [ 'remove_haskell_comments', '>filename<' ], ],
 | |
|     'AMPLE'              => [   [ 'remove_matches'      , '^\s*//' ], ],
 | |
|     'APL'                => [
 | |
|                                 [ 'remove_matches'      , '^\s*⍝' ],
 | |
|                             ],
 | |
|     'AnsProlog'          => [
 | |
|                                 [ 'remove_between_general', '%*', '*%' ],
 | |
|                                 [ 'remove_matches'      , '^\s*\%' ],
 | |
|                                 [ 'remove_inline'       , '(//|\%).*$' ],
 | |
|                             ],
 | |
|     'Ant/XML'            => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'ANTLR Grammar'      => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Ant'                => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'Apex Trigger'       => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Arduino Sketch'     => [ # Arduino IDE inserts problematic 0xA0 characters; strip them
 | |
|                                 [ 'replace_regex' , '\xa0', " " ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'ArkTs'              => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Arturo'             => [
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                                 [ 'remove_inline'       , ';.*$'   ],
 | |
|                             ],
 | |
|     'AsciiDoc'           => [
 | |
|                                 [ 'remove_between_general', '////', '////' ],
 | |
|                                 [ 'remove_matches'      , '^\s*\/\/'  ],
 | |
|                             ],
 | |
|     'AspectJ'            => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Assembly'           => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                                 [ 'remove_matches'      , '^\s*\@' ],
 | |
|                                 [ 'remove_matches'      , '^\s*\|' ],
 | |
|                                 [ 'remove_matches'      , '^\s*!'  ],
 | |
|                                 [ 'remove_matches'      , '^\s*--' ],
 | |
|                                 [ 'remove_inline'       , ';.*$'   ],
 | |
|                                 [ 'remove_inline'       , '\@.*$'  ],
 | |
|                                 [ 'remove_inline'       , '\|.*$'  ],
 | |
|                                 [ 'remove_inline'       , '!.*$'   ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                                 [ 'remove_inline'       , '--.*$'  ],
 | |
|                                 [ 'remove_matches'      , '^\*'    ],  # z/OS Assembly
 | |
|                             ],
 | |
|     'Astro'              => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Asymptote'          => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'AutoHotkey'         => [
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                                 [ 'remove_inline'       , ';.*$'   ],
 | |
|                             ],
 | |
|     'awk'                => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Bazel'              => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'bc'                 => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Blade'              => [
 | |
|                                 [ 'remove_between_general', '{{--', '--}}' ],
 | |
|                                 [ 'remove_html_comments',                  ],
 | |
|                             ],
 | |
|     'Bourne Again Shell' => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Bourne Shell'       => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Brainfuck'          => [ # puerile name for a language
 | |
| #                               [ 'call_regexp_common'  , 'Brainfuck' ],  # inaccurate
 | |
|                                 [ 'remove_bf_comments',               ],
 | |
|                             ],
 | |
|     'BrightScript'       => [
 | |
|                                 [ 'remove_matches'      , '^\s*rem', ],
 | |
|                                 [ 'remove_matches'      , '^\s*\'',  ],
 | |
|                             ],
 | |
|     'builder'            => [
 | |
|                                 [ 'remove_matches'      , '^\s*xml_markup.comment!'  ],
 | |
|                             ],
 | |
|     'C'                  => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Chapel'       => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'C++'                => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'C/C++ Header'       => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Carbon'             => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Clean'              => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Clojure'            => [
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                                 [ 'remove_matches'      , '^\s*#_'  ],
 | |
|                             ],
 | |
|     'ClojureScript'      => [   [ 'remove_matches'      , '^\s*;'  ], ],
 | |
|     'ClojureC'           => [   [ 'remove_matches'      , '^\s*;'  ], ],
 | |
|     'CMake'              => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Crystal'            => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Constraint Grammar' => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'CUDA'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Cython'             => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'docstring_to_C'                 ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'C#/Smalltalk' => [ [ 'die' ,  ], ], # never called
 | |
|     'C#'                 => [
 | |
| #                               [ 'remove_matches'      , '^\s*//' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'C# Designer'        => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'C# Generated'        => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Cake Build Script'  => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'CCS'                => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                             ],
 | |
|     'CSS'                => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                             ],
 | |
|     'CSV'                => [  # comma separated value files have no comments;
 | |
|                                 [ 'remove_matches'      , '^\s*$'  ],
 | |
|                             ], # included simply to allow diff's
 | |
|     'COBOL'              => [   [ 'remove_cobol_comments',         ], ],
 | |
|     'CoCoA 5'            => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                                 [ 'remove_matches'      , '^\s*--' ],
 | |
|                                 [ 'remove_inline'       , '--.*$'  ],
 | |
|                             ],
 | |
|     'CoffeeScript'       => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'ColdFusion'         => [   [ 'remove_html_comments',          ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ], ],
 | |
|     'ColdFusion CFScript'=> [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Containerfile'      => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Coq'                => [
 | |
|                                 [ 'remove_between_general', '(*', '*)' ],
 | |
|                             ],
 | |
|     'Crystal Reports'    => [   [ 'remove_matches'      , '^\s*//' ], ],
 | |
|     'CSON'               => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Cucumber'           => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                             ],
 | |
|     'D/dtrace'           => [ [ 'die' ,          ], ], # never called
 | |
|     'D'                  => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/+', '+/' ],
 | |
|                                 [ 'remove_between_general', '/+', '+/' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'DAL'                => [
 | |
|                                 [ 'remove_between_general', '[', ']', ],
 | |
|                             ],
 | |
|     'Dart'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'DenizenScript'      => [ # same as YAML
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Derw'               => [
 | |
|                                 [ 'remove_matches'      , '^\s*--' ],
 | |
|                                 [ 'remove_between_general', '{-', '-}' ],
 | |
|                                 [ 'remove_inline'       , '--.*$'  ],
 | |
|                             ],
 | |
|     'Dafny'              => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'dhall'              => [   [ 'remove_haskell_comments', '>filename<' ], ],
 | |
|     'Delphi Form'        => [ # same as Pascal
 | |
|                                 [ 'remove_between_regex', '\{[^$]', '}' ],
 | |
|                                 [ 'remove_between_general', '(*', '*)' ],
 | |
|                                 [ 'remove_matches'      , '^\s*//' ],
 | |
|                             ],
 | |
|     'DIET'               => [  # same as Pug
 | |
|                                 [ 'remove_pug_block'    ,          ],
 | |
|                                 [ 'remove_matches'      , '^\s*//' ],
 | |
|                                 [ 'remove_inline'       , '//.*$'  ],
 | |
|                             ],
 | |
|     # diff is kind of weird: anything but a space in the first column
 | |
|     # will count as code, with the exception of #, ---, +++.  Spaces
 | |
|     # in the first column denote context lines which aren't part of the
 | |
|     # difference.
 | |
|     'diff'               => [
 | |
|                                 [ 'remove_matches'      , '^#' ],
 | |
|                                 [ 'remove_matches'      , '^\-\-\-' ],
 | |
|                                 [ 'remove_matches'      , '^\+\+\+' ],
 | |
|                                 [ 'remove_matches'      , '^\s' ],
 | |
|                             ],
 | |
|     'DITA'               => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'DOORS Extension Language' => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Drools'             => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'dtrace'             => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'ECPP'               => [
 | |
|                                 [ 'remove_between_general',
 | |
|                                   '<%doc>', '</%doc>',             ],
 | |
|                                 [ 'remove_between_general',
 | |
|                                   '<#'    , '#>'     ,             ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'EEx'                => [
 | |
|                                 [ 'remove_between_general', '<%#', '%>' ],
 | |
|                             ],
 | |
|     'EJS'                => [
 | |
|                                 [ 'remove_between_general', '<%#', '%>' ],
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                             ],
 | |
|     'Elm'                => [   [ 'remove_haskell_comments', '>filename<' ], ],
 | |
|     'Embedded Crystal'   => [
 | |
|                                 [ 'remove_between_general', '<%#', '%>' ],
 | |
|                             ],
 | |
|     'ERB'                => [
 | |
|                                 [ 'remove_between_general', '<%#', '%>' ],
 | |
|                             ],
 | |
|     'Gencat NLS'         => [   [ 'remove_matches'       , '^\$ .*$' ], ],
 | |
|     'NASTRAN DMAP'       => [
 | |
|                                 [ 'remove_matches'      , '^\s*\$' ],
 | |
|                                 [ 'remove_inline'       , '\$.*$'  ],
 | |
|                             ],
 | |
|     'Dockerfile'         => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'DOS Batch'          => [
 | |
|                                 [ 'remove_matches'      , '^\s*rem' ],
 | |
|                                 [ 'remove_matches'      , '^\s*::'  ],
 | |
|                             ],
 | |
|     'DTD'                => [   [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ], ],
 | |
|     'Elixir'             => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'elixir_doc_to_C'                ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Erlang'             => [
 | |
|                                 [ 'remove_matches'      , '^\s*%'  ],
 | |
|                                 [ 'remove_inline'       , '%.*$'   ],
 | |
|                             ],
 | |
|     'Expect'             => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Fennel'             => [
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                                 [ 'remove_inline'       , ';.*$'   ],
 | |
|                             ],
 | |
|     'Finite State Language' => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Fish Shell'         => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Focus'              => [   [ 'remove_matches'      , '^\s*\-\*'  ], ],
 | |
|     'Forth'              => [
 | |
|                                 [ 'remove_matches'      , '^\s*\\\\.*$'  ],
 | |
|                                 [ 'Forth_paren_to_C'                 ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'      ],
 | |
|                                 [ 'remove_inline'       , '\\\\.*$'  ],
 | |
|                             ],
 | |
|     'Fortran 77'         => [
 | |
|                                 [ 'remove_f77_comments' ,          ],
 | |
|                                 [ 'remove_inline'       , '\!.*$'  ],
 | |
|                             ],
 | |
|     'Fortran 77/Forth'   => [ [ 'die' ,          ], ], # never called
 | |
|     'F#/Forth'           => [ [ 'die' ,          ], ], # never called
 | |
|     'Fortran 90'         => [
 | |
|                                 [ 'remove_f77_comments' ,          ],
 | |
|                                 [ 'remove_f90_comments' ,          ],
 | |
|                                 [ 'remove_inline'       , '\!.*$'  ],
 | |
|                             ],
 | |
|     'Fortran 95'         => [
 | |
|                                 [ 'remove_f77_comments' ,          ],
 | |
|                                 [ 'remove_f90_comments' ,          ],
 | |
|                                 [ 'remove_inline'       , '\!.*$'  ],
 | |
|                             ],
 | |
|     'Freemarker Template' => [
 | |
|                                 [ 'remove_between_general', '<#--', '-->' ],
 | |
|                             ],
 | |
|     'Futhark'            => [
 | |
|                                 [ 'remove_matches'      , '^\s*--'  ],
 | |
|                             ],
 | |
|     'FXML'               => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'F#'                 => [
 | |
|                                 [ 'remove_between_general', '(*', '*)' ],
 | |
|                                 [ 'remove_matches'      , '^\s*//' ],
 | |
|                             ],
 | |
|     'F# Script'          => [
 | |
|                                 [ 'call_regexp_common'  , 'Pascal' ],
 | |
|                                 [ 'remove_matches'      , '^\s*//' ],
 | |
|                             ],
 | |
|     'Flatbuffers'        => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                                 [ 'remove_inline'       , '//.*$'  ],
 | |
|                             ],
 | |
|     'Godot Scene'        => [
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                             ],
 | |
|     'Godot Resource'     => [
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                             ],
 | |
|     'Godot Shaders'      => [
 | |
|                                 [ 'remove_matches'      , '^\s*//' ],
 | |
|                                 [ 'remove_inline'       , '//.*$'  ],
 | |
|                             ],
 | |
|     'GDScript'           => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Glade'              => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'Gleam'              => [
 | |
|                                 [ 'remove_matches'      , '^\s*//' ],
 | |
|                                 [ 'remove_inline'       , '//.*$'  ],
 | |
|                             ],
 | |
|     'Glimmer JavaScript' => [
 | |
|                                 [ 'remove_between_general', '{{!', '}}' ],
 | |
|                                 [ 'remove_between_general', '<!--', '-->' ],
 | |
|                                 [ 'rm_comments_in_strings', "'", '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', "'", '//', '' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Glimmer TypeScript' => [
 | |
|                                 [ 'remove_between_general', '{{!', '}}' ],
 | |
|                                 [ 'remove_between_general', '<!--', '-->' ],
 | |
|                                 [ 'rm_comments_in_strings', "'", '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', "'", '//', '' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'GLSL'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Go'                 => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '["`]', '*/*', '' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                                 [ 'remove_inline'       , '//.*$'  ],
 | |
|                             ],
 | |
|     'Gradle'             => [ # same as Groovy
 | |
|                                 [ 'remove_inline'       , '//.*$'  ],
 | |
|                                 # separate /* inside quoted strings with two
 | |
|                                 # concatenated strings split between / and *
 | |
|                                 [ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\1',
 | |
|                                   '(.*?)' , '"$1$2$1 + $1$3$1$4"', 0],
 | |
|                                 [ 'rm_comments_in_strings', '"""', '/*', '*/', 1],
 | |
|                                 [ 'rm_comments_in_strings', '"""', '//', '', 1],
 | |
|                                 [ 'rm_comments_in_strings', "'''", '/*', '*/', 1],
 | |
|                                 [ 'rm_comments_in_strings', "'''", '//', '', 1],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Grails'             => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                                 [ 'remove_jsp_comments' ,          ],
 | |
|                                 [ 'add_newlines'        ,          ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'GraphQL'            => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'docstring_to_C'                 ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Groovy'             => [
 | |
|                                 [ 'remove_inline'       , '//.*$'  ],
 | |
|                                 # separate /* inside quoted strings with two
 | |
|                                 # concatenated strings split between / and *
 | |
|                                 [ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\1',
 | |
|                                   '(.*?)' , '"$1$2$1 + $1$3$1$4"', 0],
 | |
|                                 [ 'rm_comments_in_strings', '"""', '/*', '*/', 1],
 | |
|                                 [ 'rm_comments_in_strings', '"""', '//', '', 1],
 | |
|                                 [ 'rm_comments_in_strings', "'''", '/*', '*/', 1],
 | |
|                                 [ 'rm_comments_in_strings', "'''", '//', '', 1],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Haml'               => [
 | |
|                                 [ 'remove_haml_block'   ,          ],
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'remove_matches'      , '^\s*/\s*\S+' ],
 | |
|                                 [ 'remove_matches'      , '^\s*-#\s*\S+' ],
 | |
|                             ],
 | |
|     'Handlebars'         => [
 | |
|                                 [ 'remove_between_general', '{{!--', '--}}' ],
 | |
|                                 [ 'remove_between_general', '{{!', '}}' ],
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                             ],
 | |
|     'Harbour'            => [
 | |
|                                 [ 'remove_matches'      , '^\s*\&\&' ],
 | |
|                                 [ 'remove_matches'      , '^\s*\*' ],
 | |
|                                 [ 'remove_matches'      , '^\s*NOTE' ],
 | |
|                                 [ 'remove_matches'      , '^\s*note' ],
 | |
|                                 [ 'remove_matches'      , '^\s*Note' ],
 | |
|                                 [ 'remove_inline'       , '//.*$'  ],
 | |
|                                 [ 'remove_inline'       , '\&\&.*$' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Hare'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'remove_matches'      , '//.*$' ],
 | |
|                             ],
 | |
|     'Haxe'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'HCL'                => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'HLSL'               => [
 | |
|                                 [ 'remove_inline'       , '//.*$'  ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'HTML'               => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'HTML EEx'           => [
 | |
|                                 [ 'remove_matches'       , '^\s*<% #' ],
 | |
|                                 [ 'remove_between_general', '<%!--', '--%>' ],
 | |
|                             ],
 | |
|     'HolyC'              => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Hoon'               => [
 | |
|                                 [ 'remove_matches'      , '^\s*:[:><]' ],
 | |
|                                 [ 'remove_inline'       , ':[:><].*$'  ],
 | |
|                             ],
 | |
|     'INI'                => [
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                             ],
 | |
|     'XHTML'               => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'Haskell'            => [   [ 'remove_haskell_comments', '>filename<' ], ],
 | |
|     'IDL'                => [   [ 'remove_matches'      , '^\s*;'  ], ],
 | |
|     'IDL/Qt Project/Prolog/ProGuard' => [ [ 'die' ,          ], ], # never called
 | |
|     'Idris'              => [
 | |
|                                 [ 'remove_haskell_comments', '>filename<' ],
 | |
|                                 [ 'remove_matches'      , '^\s*\|{3}' ],
 | |
|                             ],
 | |
|     'Igor Pro'           => [
 | |
|                                 [ 'remove_matches'      , '^\s*//' ],
 | |
|                                 [ 'remove_inline'       , '//.*$'  ],
 | |
|                             ],
 | |
|     'Literate Idris'     => [
 | |
|                                 [ 'remove_matches'      , '^[^>]'  ],
 | |
|                             ],
 | |
|     'Imba'               => [
 | |
|                                 [ 'remove_matches'      , '^\s*#\s'],
 | |
|                                 [ 'remove_inline'       , '#\s.*$' ],
 | |
|                                 [ 'remove_between_regex', '###', '###' ],
 | |
|                             ],
 | |
|     'InstallShield'      => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'IPL'                => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Jai'                => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Jam'                => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Janet'              => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'JSP'                => [   [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                                 [ 'remove_jsp_comments' ,          ],
 | |
|                                 [ 'remove_matches'      , '^\s*//' ],
 | |
|                                 [ 'add_newlines'        ,          ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                             ],
 | |
|     'JavaServer Faces'   => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Java'               => [
 | |
| 								[ 'docstring_rm_comments', ],
 | |
|                                 [ 'replace_regex', '\\\\$', ' '],
 | |
|                                 # Java seems to have more path globs in strings
 | |
|                                 # than other languages.  The variations makes
 | |
|                                 # it tricky to craft a universal fix.
 | |
|                                 # \1 is a backreference to the first group, meaning
 | |
|                                 # either single or double quote
 | |
|                                 [ 'replace_between_regex', '(["\'])(.*?/\*)(.*?)\1',
 | |
|                                   '(.*?)' , '"xx"'],
 | |
|                                 [ 'replace_between_regex', '(["\'])(.*?\*/)(.*?)\1',
 | |
|                                   '(.*?)' , '"xx"'],
 | |
|                                ## separate /* inside quoted strings with two
 | |
|                                ## concatenated strings split between / and *
 | |
|                                ##    -> defeated by "xx/**/*_xx" issue 365
 | |
|                                #[ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\1',
 | |
|                                #  '(.*?)' , '"$1$2$1 + $1$3$1$4"'],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'JavaScript'         => [
 | |
|                                 [ 'rm_comments_in_strings', "'", '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', "'", '//', '' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Jinja Template'     => [
 | |
|                                 [ 'remove_between_general', '{#', '#}' ],
 | |
|                             ],
 | |
|     'JSX'                => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'JCL'                => [   [ 'remove_jcl_comments' ,          ], ],
 | |
|     'JSON'               => [   # ECMA-404, the JSON standard definition
 | |
|                                 # makes no provision for JSON comments
 | |
|                                 # so just use a placeholder filter
 | |
|                                 [ 'remove_matches'      , '^\s*$'  ],
 | |
|                             ],
 | |
|     'JSON5'              => [   # same as JavaScript
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Julia'              => [
 | |
|                                 [ 'remove_between_general', '#=', '=#' ],
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'docstring_to_C'                 ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Juniper Junos'      => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'kvlang'             => [
 | |
|                                 ["remove_matches", '^\s*#[^:]'],
 | |
|                             ],
 | |
|     'Kotlin'             => [
 | |
|                                 [ 'rm_comments_in_strings', '"""', '/*', '*/', 1],
 | |
|                                 [ 'rm_comments_in_strings', '"""', '//', '', 1],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Lean'               => [
 | |
|                                 [ 'remove_between_general', '/-', '-/' ],
 | |
|                                 [ 'remove_matches'      , '^\s*--' ],
 | |
|                                 [ 'remove_inline'       , '--.*$'  ],
 | |
|                             ],
 | |
|     'Lem'                => [
 | |
|                                 [ 'remove_OCaml_comments',         ],
 | |
|                             ],
 | |
|     'LESS'               => [
 | |
| #                               [ 'remove_matches'      , '^\s*//' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'LFE'                => [
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                                 [ 'remove_between_general', '#|', '|#' ],
 | |
|                             ],
 | |
|     'Linker Script'      => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/'],
 | |
|                                 [ 'call_regexp_common',     'C'            ],
 | |
|                             ],
 | |
|     'liquid'             => [
 | |
|                                 [ 'remove_between_general', '{% comment %}',
 | |
|                                                             '{% endcomment %}' ],
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                             ],
 | |
|     'Lisp'               => [
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                                 [ 'remove_between_general', '#|', '|#' ],
 | |
|                             ],
 | |
|     'Lisp/OpenCL'        => [ [ 'die' ,          ], ], # never called
 | |
|     'Lisp/Julia'         => [ [ 'die' ,          ], ], # never called
 | |
|     'LiveLink OScript'   => [   [ 'remove_matches'      , '^\s*//' ], ],
 | |
|     'LLVM IR'            => [
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                                 [ 'remove_inline'       , ';.*$'   ],
 | |
|                             ],
 | |
|     'Logos'              => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Logtalk'            => [  # same filters as Prolog
 | |
|                                 [ 'remove_matches'      , '^\s*\%' ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_inline'       , '(//|\%).*$' ],
 | |
|                             ],
 | |
|     'Lua'                => [
 | |
|                                 [ 'remove_between_general', '--[=====[', ']=====]' ],
 | |
|                                 [ 'remove_between_general', '--[====[', ']====]' ],
 | |
|                                 [ 'remove_between_general', '--[===[', ']===]' ],
 | |
|                                 [ 'remove_between_general', '--[==[', ']==]' ],
 | |
|                                 [ 'remove_between_general', '--[=[', ']=]' ],
 | |
|                                 [ 'remove_between_general', '--[[', ']]' ],
 | |
|                                 [ 'remove_matches'      , '^\s*\-\-' ],
 | |
|                             ],
 | |
|     'Luau'               => [
 | |
|                                 [ 'remove_between_general', '--[=====[', ']=====]' ],
 | |
|                                 [ 'remove_between_general', '--[====[', ']====]' ],
 | |
|                                 [ 'remove_between_general', '--[===[', ']===]' ],
 | |
|                                 [ 'remove_between_general', '--[==[', ']==]' ],
 | |
|                                 [ 'remove_between_general', '--[=[', ']=]' ],
 | |
|                                 [ 'remove_between_general', '--[[', ']]' ],
 | |
|                                 [ 'remove_matches'      , '^\s*\-\-' ],
 | |
|                             ],
 | |
|     'make'               => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Meson'              => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'MATLAB'             => [
 | |
|                                 [ 'remove_between_general', '%{', '%}' ],
 | |
|                                 [ 'remove_matches'      , '^\s*%'  ],
 | |
|                                 [ 'remove_inline'       , '%.*$'   ],
 | |
|                             ],
 | |
|     'Mathematica'        => [
 | |
|                                 [ 'remove_between_general', '(*', '*)' ],
 | |
|                             ],
 | |
|     'Maven/XML'          => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'Maven'              => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'Mercury'            => [
 | |
|                                 [ 'remove_inline'       , '%.*$'   ],
 | |
|                                 [ 'remove_matches'      , '^\s*%'  ],
 | |
|                             ],
 | |
|     'Metal'              => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Modelica'           => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Modula3'            => [   [ 'call_regexp_common'  , 'Pascal' ], ],
 | |
|         # Modula 3 comments are (* ... *) so applying the Pascal filter
 | |
|         # which also treats { ... } as a comment is not really correct.
 | |
|     'Mojom'              => [   [ 'call_regexp_common' , 'C++' ], ],
 | |
|     'Mojo'               => [
 | |
|                                 [ 'remove_matches'      , '/\*'    ],
 | |
|                                 [ 'remove_matches'      , '\*/'    ],
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'docstring_to_C'                 ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Nemerle'            => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Nunjucks'           => [
 | |
|                                 [ 'remove_between_general', '{#', '#}' ],
 | |
|                             ],
 | |
|     'Objective-C'        => [
 | |
| #                               [ 'remove_matches'      , '^\s*//' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Objective-C++'      => [
 | |
| #                               [ 'remove_matches'      , '^\s*//' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'OCaml'              => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '(*', '*)', 1 ],
 | |
|                                 [ 'remove_OCaml_comments',         ],
 | |
|                             ],
 | |
|     'OpenCL'             => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'PHP/Pascal/Fortran'       => [ [ 'die' ,          ], ], # never called
 | |
|     'Mako'               => [
 | |
|                                 [ 'remove_matches'       , '##.*$'  ],
 | |
|                             ],
 | |
|     'Markdown'           => [
 | |
|                                 [ 'remove_between_general', '<!--', '-->' ],
 | |
|                                 [ 'remove_between_regex',
 | |
|                                   '\[(comment|\/\/)?\]\s*:?\s*(<\s*>|#)?\s*\(.*?', '.*?\)' ],
 | |
|                                 # http://stackoverflow.com/questions/4823468/comments-in-markdown
 | |
|                             ],
 | |
|     'MATLAB/Mathematica/Objective-C/MUMPS/Mercury' => [ [ 'die' ,          ], ], # never called
 | |
|     'MUMPS'              => [   [ 'remove_matches'      , '^\s*;'  ], ],
 | |
|     'Mustache'           => [
 | |
|                                 [ 'remove_between_general', '{{!', '}}' ],
 | |
|                             ],
 | |
|     'Nickel'             => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Nim'                => [
 | |
|                                 [ 'remove_between_general', '#[', ']#' ],
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
| #                               [ 'docstring_to_C'                 ],
 | |
| #                               [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'NetLogo'            => [
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                                 [ 'remove_inline'       , ';.*$'   ],
 | |
|                             ],
 | |
|     'Nix'                => [
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Octave'             => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Odin'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'OpenSCAD'           => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Oracle Forms'       => [   [ 'call_regexp_common'  , 'C'      ], ],
 | |
|     'Oracle Reports'     => [   [ 'call_regexp_common'  , 'C'      ], ],
 | |
|     'Oracle PL/SQL'      => [
 | |
|                                 [ 'call_regexp_common'  , 'PL/SQL'      ],
 | |
|                             ],
 | |
|     'Pascal'             => [
 | |
|                                 [ 'remove_between_regex', '\{[^$]', '}' ],
 | |
|                                 [ 'remove_between_general', '(*', '*)' ],
 | |
|                                 [ 'remove_matches'      , '^\s*//' ],
 | |
|                             ],
 | |
| ####'Pascal'             => [
 | |
| ####                            [ 'call_regexp_common'  , 'Pascal' ],
 | |
| ####                            [ 'remove_matches'      , '^\s*//' ],
 | |
| ####                        ],
 | |
|     'Pascal/Puppet'            => [ [ 'die' ,          ], ], # never called
 | |
|     'Puppet'             => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'   ],
 | |
|                                 [ 'call_regexp_common'  , 'C'       ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'P4'                 => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Patran Command Language'=> [
 | |
|                                 [ 'remove_matches'      , '^\s*#'   ],
 | |
|                                 [ 'remove_matches'      , '^\s*\$#' ],
 | |
|                                 [ 'call_regexp_common'  , 'C'       ],
 | |
|                             ],
 | |
|     'Perl'               => [   [ 'remove_below'        , '^__(END|DATA)__'],
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_below_above'  , '^=head1', '^=cut'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'PEG'                => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'peg.js'             => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'peggy'              => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Pest'               => [
 | |
|                                 [ 'remove_matches'      , '^\s*//'  ],
 | |
|                                 [ 'remove_inline'       , '//.*$'   ],
 | |
|                             ],
 | |
|     'Perl/Prolog'        => [ [ 'die' ,          ], ], # never called
 | |
|     'PL/I'               => [
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                             ],
 | |
|     'PL/M'               => [
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                             ],
 | |
|     'PlantUML'           => [
 | |
|                                 [ 'remove_between_general', "/'", "'/" ],
 | |
|                                 [ 'remove_matches'      , "^\\s*'" ],
 | |
|                             ],
 | |
|     'Pig Latin'          => [
 | |
|                                 [ 'remove_matches'      , '^\s*--' ],
 | |
|                                 [ 'remove_inline'       , '--.*$'  ],
 | |
|                                 [ 'call_regexp_common'  , 'C'       ],
 | |
|                             ],
 | |
|     'Prisma Schema'      => [
 | |
|                                 [ 'remove_matches'      , '^\s*//' ],
 | |
|                             ],
 | |
|     'Processing'         => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'ProGuard'           => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'PO File'            => [
 | |
|                                 [ 'remove_matches'      , '^\s*#[^,]' ],  # '#,' is not a comment
 | |
|                             ],
 | |
|     'Pony'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'docstring_to_C'                 ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'PowerBuilder'       => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'PowerShell'         => [
 | |
|                                 [ 'powershell_to_C'                ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Prolog'             => [
 | |
|                                 [ 'remove_matches'      , '^\s*\%' ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_inline'       , '(//|\%).*$' ],
 | |
|                             ],
 | |
|     'Properties'         => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_matches'      , '^\s*!'  ],
 | |
|                             ],
 | |
|     'Protocol Buffers'   => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'PRQL'               => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                             ],
 | |
|     'Pug'                => [
 | |
|                                 [ 'remove_pug_block'    ,          ],
 | |
|                                 [ 'remove_matches'      , '^\s*//' ],
 | |
|                                 [ 'remove_inline'       , '//.*$'  ],
 | |
|                             ],
 | |
|     'PureScript'         => [
 | |
|                                 [ 'remove_matches'      , '^\s*--' ],
 | |
|                                 [ 'remove_between_general', '{-', '-}' ],
 | |
|                                 [ 'remove_inline'       , '--.*$'  ],
 | |
|                             ],
 | |
|     'Python'             => [
 | |
|                                 [ 'remove_matches'      , '/\*'    ],
 | |
|                                 [ 'remove_matches'      , '\*/'    ],
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'docstring_to_C'                 ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Jupyter Notebook'   => [   # these are JSON files; have no comments
 | |
|                                 # would have to parse JSON for
 | |
|                                 #      "cell_type": "code"
 | |
|                                 # to count code lines
 | |
|                                 [ 'jupyter_nb'                     ],
 | |
|                                 [ 'remove_matches'      , '^\s*$'  ],
 | |
|                             ],
 | |
|     'PHP'                => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'QML'                => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                                 [ 'remove_inline'       , '//.*$'  ],
 | |
|                             ],
 | |
|     'Qt'                 => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'Qt Linguist'        => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'Qt Project'         => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'R'                  => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Ring'               => [
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Rmd'                => [
 | |
|                                 [ 'reduce_to_rmd_code_blocks'      ],
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Racket'             => [
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                                 [ 'remove_inline'       , ';.*$'   ],
 | |
|                             ],
 | |
|     'Raku'               => [   [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_below_above'  , '^=head1', '^=cut'  ],
 | |
|                                 [ 'remove_below_above'  , '^=begin', '^=end'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Raku/Prolog'        => [ [ 'die' ,          ], ], # never called
 | |
|     'RAML'               => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'RapydScript'        => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'docstring_to_C'                 ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Razor'              => [
 | |
|                                 [ 'remove_between_general', '@*', '*@' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                                 [ 'remove_between_general', '<!--', '-->' ],
 | |
|                             ],
 | |
|     'reStructuredText'   => [
 | |
|                                 [ 'remove_between_regex', '^\.\.', '^[^ \n\t\r\f\.]' ]
 | |
|                             ],
 | |
|     'Rexx'               => [   [ 'call_regexp_common'  , 'C'      ], ],
 | |
|     'ReasonML'           => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'ReScript'           => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'RobotFramework'     => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'   ],
 | |
|                                 [ 'remove_matches'      , '^\s*Comment' ],
 | |
|                                 [ 'remove_matches'      , '^\s*\*{3}\s+(Variables|Test\s+Cases|Settings|Keywords)\s+\*{3}' ] ,
 | |
|                                 [ 'remove_matches'      , '^\s*\[(Documentation|Tags)\]' ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Ruby'               => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_below_above'  , '^=begin', '^=end' ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Ruby HTML'          => [   [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ], ],
 | |
|     'Circom'             => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Cairo'              => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Rust'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'SaltStack'          => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'SAS'                => [
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_between_general', '*', ';' ],
 | |
|                             ],
 | |
|     'Sass'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Scala'              => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Scheme/SaltStack' => [ [ 'die' ,          ], ], # never called
 | |
|     'Scheme'             => [
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                                 [ 'remove_inline'       , ';.*$'   ],
 | |
|                             ],
 | |
|     'SCSS'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'sed'                => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Slice'             => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Slim'               => [
 | |
|                                 [ 'remove_slim_block'   ,          ],
 | |
|                             ],
 | |
|     'Slint'              => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'SKILL'              => [
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                             ],
 | |
|     'SKILL++'            => [
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                             ],
 | |
|     'Squirrel'           => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Starlark'           => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'docstring_to_C'                 ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'BizTalk Pipeline' =>   [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'BizTalk Orchestration' => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'Solidity'           => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'SparForte'          => [
 | |
|                                 [ 'remove_matches'      , '^\s*#!' ],
 | |
|                                 [ 'remove_matches'      , '^\s*--' ],
 | |
|                             ],
 | |
|     'Specman e'          => [
 | |
|                                 [ 'pre_post_fix'        , "'>", "<'"],
 | |
|                                 [ 'remove_between_general', "^'>", "^<'" ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++',   ],
 | |
|                                 [ 'remove_matches'      , '^\s*--' ],
 | |
|                                 [ 'rm_last_line'        , ],  # undo pre_post_fix addition
 | |
|                                                               # of trailing line of just <'
 | |
|                             ],
 | |
|     'SQL'                => [
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_matches'      , '^\s*--' ],
 | |
|                                 [ 'remove_inline'       , '--.*$'  ],
 | |
|                             ],
 | |
|     'SQL Stored Procedure'=> [
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_matches'      , '^\s*--' ],
 | |
|                                 [ 'remove_inline'       , '--.*$'  ],
 | |
|                             ],
 | |
|     'SQL Data'           => [
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_matches'      , '^\s*--' ],
 | |
|                                 [ 'remove_inline'       , '--.*$'  ],
 | |
|                             ],
 | |
|     'Smalltalk'          => [
 | |
|                                 [ 'call_regexp_common'  , 'Smalltalk'      ],
 | |
|                             ],
 | |
|     'Smarty'             => [
 | |
|                                 [ 'smarty_to_C'                    ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                             ],
 | |
|     'Snakemake'          => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'docstring_to_C'                 ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Standard ML'        => [
 | |
|                                 [ 'remove_between_general', '(*', '*)' ],
 | |
|                             ],
 | |
|     'Stata'              => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Stylus'             => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'SugarSS'            => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Svelte'             => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'SVG'                => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'Swift'              => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'SWIG'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
| 
 | |
|     'm4'                 => [   [ 'remove_matches'      , '^dnl\s'  ], ],
 | |
|     'C Shell'            => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Kermit'             => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_matches'      , '^\s*;'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Korn Shell'         => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'TableGen'           => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Tcl/Tk'             => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'TEAL'               => [
 | |
|                                 [ 'remove_matches'      , '^\s*//' ],
 | |
|                             ],
 | |
|     'Teamcenter met'     => [   [ 'call_regexp_common'  , 'C'      ], ],
 | |
|     'Teamcenter mth'     => [   [ 'remove_matches'      , '^\s*#'  ], ],
 | |
|     'TeX'                => [
 | |
|                                 [ 'remove_matches'      , '^\s*%'  ],
 | |
|                                 [ 'remove_inline'       , '%.*$'   ],
 | |
|                             ],
 | |
|     'Text'               => [
 | |
|                                 [ 'remove_matches'      , '^\s*$'  ],
 | |
|                             ],
 | |
|     'TLA+'               => [
 | |
|                                 [ 'remove_TLAPlus_generated_code'                 ],
 | |
|                                 [ 'remove_matches'               , '^\\s*\\\\\\*' ],
 | |
|                                 [ 'remove_TLAPlus_comments'                       ],
 | |
|                             ],
 | |
|     'Thrift'             => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Titanium Style Sheet'  => [
 | |
|                                 [ 'remove_matches'      , '^\s*//' ],
 | |
|                                 [ 'remove_inline'       , '//.*$'  ],
 | |
|                                 [ 'remove_between_regex', '/[^/]', '[^/]/' ],
 | |
|                             ],
 | |
|     'TNSDL'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'      ],
 | |
|                             ],
 | |
|     'TOML'               => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'TTCN'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'      ],
 | |
|                             ],
 | |
|     'TITAN Project File Information'               => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'tspeg'              => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Twig'               => [
 | |
|                                 [ 'remove_between_general', '{#', '#}' ],
 | |
|                             ],
 | |
|     'TypeScript'         => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Typst'              => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Umka'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Unity-Prefab'       => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Visual Fox Pro'     =>  [
 | |
|                                 [ 'remove_matches'      , '^\s*\*' ],
 | |
|                                 [ 'remove_inline'       , '\*.*$'  ],
 | |
|                                 [ 'remove_matches'      , '^\s*&&' ],
 | |
|                                 [ 'remove_inline'       , '&&.*$'  ],
 | |
|                             ],
 | |
|     'Softbridge Basic'   => [   [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],
 | |
|                                 [ 'remove_matches'      , '^\s*Attribute\s+'],
 | |
|                                 [ 'remove_matches'      , '^\s*\47'], ],  # \47 = '
 | |
|     # http://www.altium.com/files/learningguides/TR0114%20VHDL%20Language%20Reference.pdf
 | |
|     'Vala'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Vala Header'        => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Verilog-SystemVerilog/Coq' => [ ['die'] ], # never called
 | |
|     'Verilog-SystemVerilog' => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'VHDL'               => [
 | |
|                                 [ 'remove_matches'      , '^\s*--' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                                 [ 'remove_inline'       , '--.*$'  ],
 | |
|                             ],
 | |
|     'vim script'         => [
 | |
|                                 [ 'remove_matches'      , '^\s*"'  ],
 | |
|                                 [ 'remove_inline'       , '".*$'   ],
 | |
|                             ],
 | |
|     'Visual Basic Script' => [
 | |
|                                 [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],
 | |
|                                 [ 'remove_matches'      , '^\s*Attribute\s+'],
 | |
|                                 [ 'remove_matches'      , '^\s*\47'],     # \47 = '
 | |
|                             ],
 | |
|     'Visual Basic .NET' => [
 | |
|                                 [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],
 | |
|                                 [ 'remove_matches'      , '^\s*Attribute\s+'],
 | |
|                                 [ 'remove_matches'      , '^\s*\47'],     # \47 = '
 | |
|                             ],
 | |
|     'VB for Applications' => [
 | |
|                                 [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],
 | |
|                                 [ 'remove_matches'      , '^\s*Attribute\s+'],
 | |
|                                 [ 'remove_matches'      , '^\s*\47'],     # \47 = '
 | |
|                             ],
 | |
|     'Visual Basic'       => [
 | |
|                                 [ 'remove_above'        , '^\s*Attribute\s+VB_Name\s+=' ],
 | |
|                                 [ 'remove_matches'      , '^\s*Attribute\s+'],
 | |
|                                 [ 'remove_matches'      , '^\s*\47'],     # \47 = '
 | |
|                             ],
 | |
|     'Visualforce Component' => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'Visualforce Page'   => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'Velocity Template Language' => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                                 [ 'remove_jsp_comments' ,          ],
 | |
|                                 [ 'remove_matches'      , '^\s*##' ],
 | |
|                                 [ 'remove_between_general', '#**', '*#' ],
 | |
|                                 [ 'add_newlines'        ,          ],
 | |
|                             ],
 | |
|     'Vuejs Component'     => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Vyper'              => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'docstring_to_C'                 ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'Teamcenter def'     => [   [ 'remove_matches'      , '^\s*#'  ], ],
 | |
|     'Windows Module Definition' => [
 | |
|                                 [ 'remove_matches'      , '^\s*;' ],
 | |
|                                 [ 'remove_inline'       , ';.*$'  ],
 | |
|                             ],
 | |
|     'yacc'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'YAML'               => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     'lex'                => [   [ 'call_regexp_common'  , 'C'      ], ],
 | |
|     'XAML'               => [   [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ], ],
 | |
|     'xBase Header'       => [
 | |
| #                               [ 'remove_matches'      , '^\s*//' ],
 | |
|                                 [ 'remove_matches'      , '^\s*\&\&' ],
 | |
|                                 [ 'remove_matches'      , '^\s*\*' ],
 | |
|                                 [ 'remove_matches'      , '^\s*NOTE' ],
 | |
|                                 [ 'remove_matches'      , '^\s*note' ],
 | |
|                                 [ 'remove_matches'      , '^\s*Note' ],
 | |
|                                 [ 'remove_inline'       , '\&\&.*$' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'xBase'              => [
 | |
| #                               [ 'remove_matches'      , '^\s*//' ],
 | |
|                                 [ 'remove_matches'      , '^\s*\&\&' ],
 | |
|                                 [ 'remove_matches'      , '^\s*\*' ],
 | |
|                                 [ 'remove_matches'      , '^\s*NOTE' ],
 | |
|                                 [ 'remove_matches'      , '^\s*note' ],
 | |
|                                 [ 'remove_matches'      , '^\s*Note' ],
 | |
|                                 [ 'remove_inline'       , '\&\&.*$' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'MXML'               => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                                 [ 'remove_matches'      , '^\s*//' ],
 | |
|                                 [ 'add_newlines'        ,          ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                             ],
 | |
|     'Web Services Description' => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'WebAssembly'           => [
 | |
|                                 [ 'remove_matches'      , '^\s*;;' ],
 | |
|                             ],
 | |
|     'WGSL'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Windows Message File'  => [
 | |
|                                 [ 'remove_matches'      , '^\s*;\s*//' ],
 | |
|                                 [ 'call_regexp_common'  , 'C'          ],
 | |
|                                 [ 'remove_matches'      , '^\s*;\s*$'  ],
 | |
| #                               next line only hypothetical
 | |
| #                               [ 'remove_matches_2re'  , '^\s*;\s*/\*',
 | |
| #                                                         '^\s*;\s*\*/', ],
 | |
|                             ],
 | |
|     'Windows Resource File' => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'WiX source'         => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'WiX include'        => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'WiX string localization' => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'WXML'               => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'WXSS'               => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C'      ],
 | |
|                             ],
 | |
|     'X++'                => [
 | |
|                                 [ 'remove_matches', '\s*#\s*//' ],
 | |
|                                 [ 'remove_between_regex', '#\s*/\*', '\*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'XMI'                => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'XML'                => [
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'XQuery'             => [
 | |
|                                 [ 'remove_between_general', '(:', ':)' ],
 | |
|                             ],
 | |
|     'XSD'                => [   [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ], ],
 | |
|     'XSLT'               => [   [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ], ],
 | |
|     'Xtend'              => [   # copy of Java, plus triple << inline
 | |
|                                 # separate /* inside quoted strings with two
 | |
|                                 # concatenated strings split between / and *
 | |
|                                 [ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\1',
 | |
|                                   '(.*?)' , '"$1$2$1 + $1$3$1$4"', 0],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                                 [ 'remove_matches'      , '^\s*\x{c2ab}{3}'  ], # doesn't work
 | |
|                                 # \xCA2B is unicode << character
 | |
|                             ],
 | |
|     'NAnt script'       => [    [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ], ],
 | |
|     'MSBuild script'    => [    [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ], ],
 | |
|     'Visual Studio Module' => [
 | |
|                                 [ 'rm_comments_in_strings', '"', '/*', '*/' ],
 | |
|                                 [ 'rm_comments_in_strings', '"', '//', '' ],
 | |
|                                 [ 'call_regexp_common'  , 'C++'    ],
 | |
|                             ],
 | |
|     'Visual Studio Solution' => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_html_comments',          ],
 | |
|                                 [ 'call_regexp_common'  , 'HTML'   ],
 | |
|                             ],
 | |
|     'Zig'                => [
 | |
|                                 [ 'remove_matches'      , '^\s*//'  ],
 | |
|                                 [ 'remove_inline'       , '//.*$'   ],
 | |
|                             ],
 | |
|     'zsh'                => [
 | |
|                                 [ 'remove_matches'      , '^\s*#'  ],
 | |
|                                 [ 'remove_inline'       , '#.*$'   ],
 | |
|                             ],
 | |
|     );
 | |
| # 1}}}
 | |
| %{$rh_EOL_continuation_re} = (               # {{{1
 | |
|     'ActionScript'       =>     '\\\\$'         ,
 | |
|     'AspectJ'            =>     '\\\\$'         ,
 | |
|     'Assembly'           =>     '\\\\$'         ,
 | |
|     'ASP'                =>     '\\\\$'         ,
 | |
|     'ASP.NET'            =>     '\\\\$'         ,
 | |
|     'Ada'                =>     '\\\\$'         ,
 | |
|     'awk'                =>     '\\\\$'         ,
 | |
|     'bc'                 =>     '\\\\$'         ,
 | |
|     'C'                  =>     '\\\\$'         ,
 | |
|     'C++'                =>     '\\\\$'         ,
 | |
|     'C/C++ Header'       =>     '\\\\$'         ,
 | |
|     'CMake'              =>     '\\\\$'         ,
 | |
|     'Cython'             =>     '\\\\$'         ,
 | |
|     'C#'                 =>     '\\\\$'         ,
 | |
|     'C# Designer'        =>     '\\\\$'         ,
 | |
|     'Cake Build Script'  =>     '\\\\$'         ,
 | |
|     'D'                  =>     '\\\\$'         ,
 | |
|     'Dart'               =>     '\\\\$'         ,
 | |
|     'Expect'             =>     '\\\\$'         ,
 | |
|     'Futhark'            =>     '\\\\$'         ,
 | |
|     'Gencat NLS'         =>     '\\\\$'         ,
 | |
|     'Go'                 =>     '\\\\$'         ,
 | |
|     'IDL'                =>     '\$\\$'         ,
 | |
|     'Igor Pro'           =>     '\\$'           ,
 | |
| #   'Java'               =>     '\\\\$'         ,
 | |
|     'JavaScript'         =>     '\\\\$'         ,
 | |
|     'JSON5'              =>     '\\\\$'         ,
 | |
|     'JSX'                =>     '\\\\$'         ,
 | |
|     'LESS'               =>     '\\\\$'         ,
 | |
|     'Lua'                =>     '\\\\$'         ,
 | |
|     'Luau'               =>     '\\\\$'         ,
 | |
|     'make'               =>     '\\\\$'         ,
 | |
|     'MATLAB'             =>     '\.\.\.\s*$'    ,
 | |
|     'Meson'              =>     '\\\\$'         ,
 | |
|     'Metal'              =>     '\\\\$'         ,
 | |
|     'MXML'               =>     '\\\\$'         ,
 | |
|     'Objective-C'        =>     '\\\\$'         ,
 | |
|     'Objective-C++'      =>     '\\\\$'         ,
 | |
|     'OCaml'              =>     '\\\\$'         ,
 | |
|     'Octave'             =>     '\.\.\.\s*$'    ,
 | |
|     'Qt Project'         =>     '\\\\$'         ,
 | |
|     'Patran Command Language'=> '\\\\$'         ,
 | |
|     'PowerBuilder'       =>     '\\\\$'         ,
 | |
|     'PowerShell'         =>     '\\\\$'         ,
 | |
|     'Python'             =>     '\\\\$'         ,
 | |
|     'R'                  =>     '\\\\$'         ,
 | |
|     'Rmd'                =>     '\\\\$'         ,
 | |
|     'Ruby'               =>     '\\\\$'         ,
 | |
|     'sed'                =>     '\\\\$'         ,
 | |
|     'Swift'              =>     '\\\\$'         ,
 | |
|     'Bourne Again Shell' =>     '\\\\$'         ,
 | |
|     'Bourne Shell'       =>     '\\\\$'         ,
 | |
|     'C Shell'            =>     '\\\\$'         ,
 | |
|     'kvlang'             =>     '\\\\$'         ,
 | |
|     'Kermit'             =>     '\\\\$'         ,
 | |
|     'Korn Shell'         =>     '\\\\$'         ,
 | |
|     'Slint'              =>     '\\\\$'         ,
 | |
|     'Snakemake'          =>     '\\\\$'         ,
 | |
|     'Starlark'           =>     '\\\\$'         ,
 | |
|     'Solidity'           =>     '\\\\$'         ,
 | |
|     'Stata'              =>     '///$'          ,
 | |
|     'Stylus'             =>     '\\\\$'         ,
 | |
|     'Tcl/Tk'             =>     '\\\\$'         ,
 | |
|     'TTCN'               =>     '\\\\$'         ,
 | |
|     'TypeScript'         =>     '\\\\$'         ,
 | |
|     'lex'                =>     '\\\\$'         ,
 | |
|     'Vala'               =>     '\\\\$'         ,
 | |
|     'Vala Header'        =>     '\\\\$'         ,
 | |
|     'Vyper'              =>     '\\\\$'         ,
 | |
|     'X++'                =>     '\\\\$'         ,
 | |
|     'zsh'                =>     '\\\\$'         ,
 | |
|     );
 | |
| # 1}}}
 | |
| %{$rh_Not_Code_Extension}    = (             # {{{1
 | |
|    '1'             => 1,  # Man pages (documentation):
 | |
|    '2'             => 1,
 | |
|    '3'             => 1,
 | |
|    '4'             => 1,
 | |
|    '5'             => 1,
 | |
|    '6'             => 1,
 | |
|    '7'             => 1,
 | |
|    '8'             => 1,
 | |
|    '9'             => 1,
 | |
|    'a'             => 1,  # Static object code.
 | |
|    'ad'            => 1,  # X application default resource file.
 | |
|    'afm'           => 1,  # font metrics
 | |
|    'arc'           => 1,  # arc(1) archive
 | |
|    'arj'           => 1,  # arj(1) archive
 | |
|    'au'            => 1,  # Audio sound filearj(1) archive
 | |
|    'bak'           => 1,  # Backup files - we only want to count the "real" files.
 | |
|    'bdf'           => 1,
 | |
|    'bmp'           => 1,
 | |
|    'bz2'           => 1,  # bzip2(1) compressed file
 | |
|    'csv'           => 1,
 | |
|    'desktop'       => 1,
 | |
|    'dic'           => 1,
 | |
|    'doc'           => 1,
 | |
|    'elc'           => 1,
 | |
|    'eps'           => 1,
 | |
|    'fig'           => 1,
 | |
|    'gif'           => 1,
 | |
|    'gz'            => 1,
 | |
|    'h5'            => 1,  # hierarchical data format
 | |
|    'hdf'           => 1,  # hierarchical data format
 | |
|    'in'            => 1,  # Debatable.
 | |
|    'jpg'           => 1,
 | |
|    'kdelnk'        => 1,
 | |
|    'man'           => 1,
 | |
|    'mf'            => 1,
 | |
|    'mp3'           => 1,
 | |
|    'n'             => 1,
 | |
|    'o'             => 1,  # Object code is generated from source code.
 | |
|    'o.cmd'         => 1,  # not DOS Batch; Linux kernel compilation optimization file
 | |
|    'o.d'           => 1,  # cmake object dependency file
 | |
|    'pbm'           => 1,
 | |
|    'pdf'           => 1,
 | |
|    'pfb'           => 1,
 | |
|    'png'           => 1,
 | |
|    'ppt'           => 1,
 | |
|    'pptx'          => 1,
 | |
|    'ps'            => 1,  # Postscript is _USUALLY_ generated automatically.
 | |
|    'sgm'           => 1,
 | |
|    'sgml'          => 1,
 | |
|    'so'            => 1,  # Dynamically-loaded object code.
 | |
|    'Tag'           => 1,
 | |
|    'tfm'           => 1,
 | |
|    'tgz'           => 1,  # gzipped tarball
 | |
|    'tiff'          => 1,
 | |
|    'tsv'           => 1,  # tab separated values
 | |
|    'vf'            => 1,
 | |
|    'wav'           => 1,
 | |
|    'xbm'           => 1,
 | |
|    'xls'           => 1,
 | |
|    'xlsx'          => 1,
 | |
|    'xpm'           => 1,
 | |
|    'Y'             => 1,  # file compressed with "Yabba"
 | |
|    'Z'             => 1,  # file compressed with "compress"
 | |
|    'zip'           => 1,  # zip archive
 | |
|    'gitattributes' => 1,
 | |
|    'gitignore'     => 1,
 | |
|    'gitmodules'    => 1,
 | |
| ); # 1}}}
 | |
| %{$rh_Not_Code_Filename}     = (             # {{{1
 | |
|    'AUTHORS'     => 1,
 | |
|    'BUGS'        => 1,
 | |
|    'BUGS'        => 1,
 | |
|    'Changelog'   => 1,
 | |
|    'ChangeLog'   => 1,
 | |
|    'ChangeLog'   => 1,
 | |
|    'Changes'     => 1,
 | |
|    'CHANGES'     => 1,
 | |
|    'COPYING'     => 1,
 | |
|    'COPYING'     => 1,
 | |
|    'DESCRIPTION' => 1, # R packages metafile
 | |
|    '.cvsignore'  => 1,
 | |
|    'Entries'     => 1,
 | |
|    'FAQ'         => 1,
 | |
|    'INSTALL'     => 1,
 | |
|    'MAINTAINERS' => 1,
 | |
|    'MD5SUMS'     => 1,
 | |
|    'NAMESPACE'   => 1, # R packages metafile
 | |
|    'NEWS'        => 1,
 | |
|    'readme'      => 1,
 | |
|    'Readme'      => 1,
 | |
|    'README'      => 1,
 | |
|    'README.tk'   => 1, # used in kdemultimedia, it's confusing.
 | |
|    'Repository'  => 1,
 | |
|    'Root'        => 1, # CVS
 | |
|    'TODO'        => 1,
 | |
| );
 | |
| # 1}}}
 | |
| %{$rh_Scale_Factor}          = (             # {{{1
 | |
|     '(unknown)'                    =>   0.00,
 | |
|     '1st generation default'       =>   0.25,
 | |
|     '2nd generation default'       =>   0.75,
 | |
|     '3rd generation default'       =>   1.00,
 | |
|     '4th generation default'       =>   4.00,
 | |
|     '5th generation default'       =>  16.00,
 | |
|     'ABAP'                         =>   5.00,
 | |
|     'ActionScript'                 =>   1.36,
 | |
|     'Ada'                          =>   0.52,
 | |
|     'ADSO/IDSM'                    =>   3.00,
 | |
|     'Agda'                         =>   2.11,
 | |
|     'AMPLE'                        =>   2.00,
 | |
|     'AnsProlog'                    =>   1.25,
 | |
|     'Ant/XML'                      =>   1.90,
 | |
|     'Ant'                          =>   1.90,
 | |
|     'ANTLR Grammar'                =>   2.00,
 | |
|     'SQL'                          =>   6.15,
 | |
|     'SQL Stored Procedure'         =>   6.15,
 | |
|     'SQL Data'                     =>   1.00,
 | |
|     'Apex Class'                   =>   1.50,
 | |
|     'APL'                          =>   2.50,
 | |
|     'ArkTs'                        =>   2.50,
 | |
|     'Arturo'                       =>   4.00,
 | |
|     'AsciiDoc'                     =>   1.50,
 | |
|     'AspectJ'                      =>   1.36,
 | |
|     'asa'                          =>   1.29,
 | |
|     'ASP'                          =>   1.29,
 | |
|     'ASP.NET'                      =>   1.29,
 | |
|     'CCS'                          =>   5.33,
 | |
|     'Apex Trigger'                 =>   1.4 ,
 | |
|     'AppleScript'                  =>   4.0 ,
 | |
|     'Arduino Sketch'               =>   1.00,
 | |
|     'Assembly'                     =>   0.25,
 | |
|     'Astro'                        =>   3.0,
 | |
|     'Asymptote'                    =>   2.50,
 | |
|     'autocoder'                    =>   0.25,
 | |
|     'AutoHotkey'                   =>   1.29,
 | |
|     'awk'                          =>   3.81,
 | |
|     'basic'                        =>   0.75,
 | |
|     'Bazel'                        =>   1.00,
 | |
|     'bc'                           =>   1.50,
 | |
|     'Blade'                        =>   2.00,
 | |
|     'bliss'                        =>   0.75,
 | |
|     'bmsgen'                       =>   2.22,
 | |
|     'bteq'                         =>   6.15,
 | |
|     'Brainfuck'                    =>   0.10,
 | |
|     'BrightScript'                 =>   2.00,
 | |
|     'builder'                      =>   2.00,
 | |
|     'C'                            =>   0.77,
 | |
|     'c set 2'                      =>   0.88,
 | |
|     'C#'                           =>   1.36,
 | |
|     'C# Designer'                  =>   1.36,
 | |
|     'C# Generated'                 =>   1.36,
 | |
|     'Cake Build Script'            =>   1.36,
 | |
|     'C++'                          =>   1.51,
 | |
|     'Carbon'                       =>   1.51,
 | |
|     'ColdFusion'                   =>   4.00,
 | |
|     'ColdFusion CFScript'          =>   4.00,
 | |
|     'Chapel'                       =>   2.96,
 | |
|     'Clean'                        =>   2.50,
 | |
|     'Clojure'                      =>   1.25,
 | |
|     'ClojureScript'                =>   1.25,
 | |
|     'ClojureC'                     =>   1.25,
 | |
|     'CMake'                        =>   1.00,
 | |
|     'COBOL'                        =>   1.04,
 | |
|     'CoCoA 5'                      =>   1.04,
 | |
|     'CoffeeScript'                 =>   2.00,
 | |
|     'Constraint Grammar'           =>   4.00,
 | |
|     'Containerfile'                =>   2.00,
 | |
|     'Coq'                          =>   5.00,
 | |
|     'Crystal'                      =>   2.50,
 | |
|     'Crystal Reports'              =>   4.00,
 | |
|     'csl'                          =>   1.63,
 | |
|     'CSON'                         =>   2.50,
 | |
|     'csp'                          =>   1.51,
 | |
|     'cssl'                         =>   1.74,
 | |
|     'CSS'                          =>   1.0,
 | |
|     'CSV'                          =>   0.1,
 | |
|     'Cucumber'                     =>   3.00,
 | |
|     'CUDA'                         =>   1.00,
 | |
|     'D'                            =>   1.70,
 | |
|     'Dafny'                        =>   3.00,
 | |
|     'DAL'                          =>   1.50,
 | |
|     'Dart'                         =>   2.00,
 | |
|     'DenizenScript'                =>   1.00,
 | |
|     'Delphi Form'                  =>   2.00,
 | |
|     'DIET'                         =>   2.00,
 | |
|     'diff'                         =>   1.00,
 | |
|     'Derw'                         =>   3.00,
 | |
|     'dhall'                        =>   2.11,
 | |
|     'DITA'                         =>   1.90,
 | |
|     'dtrace'                       =>   2.00,
 | |
|     'NASTRAN DMAP'                 =>   2.35,
 | |
|     'DOORS Extension Language'     =>   1.50,
 | |
|     'Dockerfile'                   =>   2.00,
 | |
|     'DOS Batch'                    =>   0.63,
 | |
|     'Drools'                       =>   2.00,
 | |
|     'ECPP'                         =>   1.90,
 | |
|     'eda/sql'                      =>   6.67,
 | |
|     'edscheme 3.4'                 =>   1.51,
 | |
|     'EEx'                          =>   2.00,
 | |
|     'EJS'                          =>   2.50,
 | |
|     'Elixir'                       =>   2.11,
 | |
|     'Elm'                          =>   2.50,
 | |
|     'Embedded Crystal'             =>   2.00,
 | |
|     'ERB'                          =>   2.00,
 | |
|     'Erlang'                       =>   2.11,
 | |
|     'Fennel'                       =>   2.50,
 | |
|     'Finite State Language'        =>   2.00,
 | |
|     'Focus'                        =>   1.90,
 | |
|     'Forth'                        =>   1.25,
 | |
|     'Fortran 66'                   =>   0.63,
 | |
|     'Fortran 77'                   =>   0.75,
 | |
|     'Fortran 90'                   =>   1.00,
 | |
|     'Fortran 95'                   =>   1.13,
 | |
|     'Fortran II'                   =>   0.63,
 | |
|     'foundation'                   =>   2.76,
 | |
|     'Freemarker Template'          =>   1.48,
 | |
|     'Futhark'                      =>   3.00, # Guessed from value of ML
 | |
|     'F#'                           =>   2.50,
 | |
|     'F# Script'                    =>   2.50,
 | |
|     'Flatbuffers'                  =>   2.50,
 | |
|     'Glade'                        =>   2.00,
 | |
|     'Gleam'                        =>   2.50,
 | |
|     'GLSL'                         =>   2.00,
 | |
|     'Glimmer JavaScript'           =>   3.50,
 | |
|     'Glimmer TypeScript'           =>   3.50,
 | |
|     'gml'                          =>   1.74,
 | |
|     'gpss'                         =>   1.74,
 | |
|     'guest'                        =>   2.86,
 | |
|     'guru'                         =>   1.63,
 | |
|     'GDScript'                     =>   2.50,
 | |
|     'Godot Scene'                  =>   2.50,
 | |
|     'Godot Resource'               =>   2.50,
 | |
|     'Godot Shaders'                =>   2.50,
 | |
|     'Go'                           =>   2.50,
 | |
|     'Gradle'                       =>   4.00,
 | |
|     'Grails'                       =>   1.48,
 | |
|     'GraphQL'                      =>   4.00,
 | |
|     'Groovy'                       =>   4.10,
 | |
|     'gw basic'                     =>   0.82,
 | |
|     'HCL'                          =>   2.50,
 | |
|     'high c'                       =>   0.63,
 | |
|     'hlevel'                       =>   1.38,
 | |
|     'hp basic'                     =>   0.63,
 | |
|     'Haml'                         =>   2.50,
 | |
|     'Handlebars'                   =>   2.50,
 | |
|     'Harbour'                      =>   2.00,
 | |
|     'Hare'                         =>   2.50,
 | |
|     'Haskell'                      =>   2.11,
 | |
|     'Haxe'                         =>   2.00,
 | |
|     'HolyC'                        =>   2.50,
 | |
|     'Hoon'                         =>   2.00,
 | |
|     'HTML'                         =>   1.90,
 | |
|     'HTML EEx'                     =>   3.00,
 | |
|     'XHTML'                        =>   1.90,
 | |
|     'XMI'                          =>   1.90,
 | |
|     'XML'                          =>   1.90,
 | |
|     'FXML'                         =>   1.90,
 | |
|     'MXML'                         =>   1.90,
 | |
|     'XSLT'                         =>   1.90,
 | |
|     'DTD'                          =>   1.90,
 | |
|     'XSD'                          =>   1.90,
 | |
|     'NAnt script'                  =>   1.90,
 | |
|     'MSBuild script'               =>   1.90,
 | |
|     'Visual Studio Module'         =>   1.00,
 | |
|     'Visual Studio Solution'       =>   1.00,
 | |
|     'HLSL'                         =>   2.00,
 | |
|     'Idris'                        =>   2.00,
 | |
|     'Literate Idris'               =>   2.00,
 | |
|     'Igor Pro'                     =>   4.00,
 | |
|     'Imba'                         =>   3.00,
 | |
|     'INI'                          =>   1.00,
 | |
|     'InstallShield'                =>   1.90,
 | |
|     'IPL'                          =>   2.00,
 | |
|     'Jai'                          =>   1.13,
 | |
|     'Jam'                          =>   2.00,
 | |
|     'Janet'                        =>   3.00,
 | |
|     'Java'                         =>   1.36,
 | |
|     'JavaScript'                   =>   1.48,
 | |
|     'JavaServer Faces'             =>   1.5 ,
 | |
|     'Jinja Template'               =>   1.5 ,
 | |
|     'JSON'                         =>   2.50,
 | |
|     'JSON5'                        =>   2.50,
 | |
|     'JSP'                          =>   1.48,
 | |
|     'JSX'                          =>   1.48,
 | |
|     'Velocity Template Language'   =>   1.00,
 | |
|     'JCL'                          =>   1.67,
 | |
|     'Juniper Junos'                =>   2.00,
 | |
|     'kvlang'                       =>   2.00,
 | |
|     'Kermit'                       =>   2.00,
 | |
|     'Korn Shell'                   =>   3.81,
 | |
|     'Kotlin'                       =>   2.00,
 | |
|     'Lean'                         =>   3.00,
 | |
|     'LESS'                         =>   1.50,
 | |
|     'Lem'                          =>   3.00,
 | |
|     'LFE'                          =>   1.25,
 | |
|     'Linker Script'                =>   1.00,
 | |
|     'liquid'                       =>   3.00,
 | |
|     'Lisp'                         =>   1.25,
 | |
|     'LiveLink OScript'             =>   3.5 ,
 | |
|     'LLVM IR'                      =>   0.90,
 | |
|     'Logos'                        =>   2.00,
 | |
|     'Logtalk'                      =>   2.00,
 | |
|     'm4'                           =>   1.00,
 | |
|     'make'                         =>   2.50,
 | |
|     'Mako'                         =>   1.50,
 | |
|     'Markdown'                     =>   1.00,
 | |
|     'mathcad'                      =>  16.00,
 | |
|     'Maven'                        =>   1.90,
 | |
|     'Meson'                        =>   1.00,
 | |
|     'Metal'                        =>   1.51,
 | |
|     'Modelica'                     =>   2.00,
 | |
|     'MUMPS'                        =>   4.21,
 | |
|     'Mustache'                     =>   1.75,
 | |
|     'Nastran'                      =>   1.13,
 | |
|     'Nemerle'                      =>   2.50,
 | |
|     'NetLogo'                      =>   4.00,
 | |
|     'Nickel'                       =>   2.00,
 | |
|     'Nim'                          =>   2.00,
 | |
|     'Nix'                          =>   2.70,
 | |
|     'Nunjucks'                     =>   1.5 ,
 | |
|     'Objective-C'                  =>   2.96,
 | |
|     'Objective-C++'                =>   2.96,
 | |
|     'OCaml'                        =>   3.00,
 | |
|     'Odin'                         =>   2.00,
 | |
|     'OpenSCAD'                     =>   1.00,
 | |
|     'Oracle Reports'               =>   2.76,
 | |
|     'Oracle Forms'                 =>   2.67,
 | |
|     'Oracle Developer/2000'        =>   3.48,
 | |
|     'Other'                        =>   1.00,
 | |
|     'P4'                           =>   1.5 ,
 | |
|     'Pascal'                       =>   0.88,
 | |
|     'Patran Command Language'      =>   2.50,
 | |
|     'Perl'                         =>   4.00,
 | |
|     'PEG'                          =>   3.00,
 | |
|     'peg.js'                       =>   3.00,
 | |
|     'peggy'                        =>   3.00,
 | |
|     'Pest'                         =>   2.00,
 | |
|     'tspeg'                        =>   3.00,
 | |
|     'Pig Latin'                    =>   1.00,
 | |
|     'PL/I'                         =>   1.38,
 | |
|     'PL/M'                         =>   1.13,
 | |
|     'PlantUML'                     =>   2.00,
 | |
|     'Oracle PL/SQL'                =>   2.58,
 | |
|     'PO File'                      =>   1.50,
 | |
|     'Pony'                         =>   3.00,
 | |
|     'PowerBuilder'                 =>   3.33,
 | |
|     'PowerShell'                   =>   3.00,
 | |
|     'Prisma Schema'                =>   2.50,
 | |
|     'Processing'                   =>   2.50,
 | |
|     'ProGuard'                     =>   2.50,
 | |
|     'Prolog'                       =>   1.25,
 | |
|     'Properties'                   =>   1.36,
 | |
|     'Protocol Buffers'             =>   2.00,
 | |
|     'PRQL'                         =>   3.00,
 | |
|     'Pug'                          =>   2.00,
 | |
|     'Puppet'                       =>   2.00,
 | |
|     'PureScript'                   =>   2.00,
 | |
|     'QML'                          =>   1.25,
 | |
|     'Qt'                           =>   2.00,
 | |
|     'Qt Linguist'                  =>   1.00,
 | |
|     'Qt Project'                   =>   1.00,
 | |
|     'R'                            =>   3.00,
 | |
|     'Rmd'                          =>   3.00,
 | |
|     'Racket'                       =>   1.50,
 | |
|     'Raku'                         =>   4.00,
 | |
|     'rally'                        =>   2.00,
 | |
|     'ramis ii'                     =>   2.00,
 | |
|     'RAML'                         =>   0.90,
 | |
|     'ReasonML'                     =>   2.50,
 | |
|     'ReScript'                     =>   2.50,
 | |
|     'reStructuredText'             =>   1.50,
 | |
|     'Razor'                        =>   2.00,
 | |
|     'Rexx'                         =>   1.19,
 | |
|     'Ring'                         =>   4.20,
 | |
|     'RobotFramework'               =>   2.50,
 | |
|     'Circom'                       =>   1.00,
 | |
|     'Cairo'                        =>   1.00,
 | |
|     'Rust'                         =>   1.00,
 | |
|     'sas'                          =>   1.95,
 | |
|     'Scala'                        =>   4.10,
 | |
|     'Scheme'                       =>   1.51,
 | |
|     'Slim'                         =>   3.00,
 | |
|     'Solidity'                     =>   1.48,
 | |
|     'Bourne Shell'                 =>   3.81,
 | |
|     'Bourne Again Shell'           =>   3.81,
 | |
|     'ksh'                          =>   3.81,
 | |
|     'zsh'                          =>   3.81,
 | |
|     'Fish Shell'                   =>   3.81,
 | |
|     'C Shell'                      =>   3.81,
 | |
|     'SaltStack'                    =>   2.00,
 | |
|     'SAS'                          =>   1.5 ,
 | |
|     'Sass'                         =>   1.5 ,
 | |
|     'SCSS'                         =>   1.5 ,
 | |
|     'SKILL'                        =>   2.00,
 | |
|     'SKILL++'                      =>   2.00,
 | |
|     'Slice'                        =>   1.50,
 | |
|     'Slint'                        =>   1.00,
 | |
|     'Smalltalk'                    =>   4.00,
 | |
|     'Smarty'                       =>   3.50,
 | |
|     'Softbridge Basic'             =>   2.76,
 | |
|     'SparForte'                    =>   3.80,
 | |
|     'sps'                          =>   0.25,
 | |
|     'spss'                         =>   2.50,
 | |
|     'Specman e'                    =>   2.00,
 | |
|     'SQL'                          =>   2.29,
 | |
|     'Squirrel'                     =>   2.50,
 | |
|     'Standard ML'                  =>   3.00,
 | |
|     'Stata'                        =>   3.00,
 | |
|     'Stylus'                       =>   1.48,
 | |
|     'SugarSS'                      =>   2.50,
 | |
|     'Svelte'                       =>   2.00,
 | |
|     'SVG'                          =>   1.00,
 | |
|     'Swift'                        =>   2.50,
 | |
|     'SWIG'                         =>   2.50,
 | |
|     'TableGen'                     =>   2.00,
 | |
|     'Tcl/Tk'                       =>   4.00,
 | |
|     'TEAL'                         =>   0.50,
 | |
|     'Teamcenter def'               =>   1.00,
 | |
|     'Teamcenter met'               =>   1.00,
 | |
|     'Teamcenter mth'               =>   1.00,
 | |
|     'TeX'                          =>   1.50,
 | |
|     'Text'                         =>   0.50,
 | |
|     'Thrift'                       =>   2.50,
 | |
|     'TLA+'                         =>   1.00,
 | |
|     'Titanium Style Sheet'         =>   2.00,
 | |
|     'TOML'                         =>   2.76,
 | |
|     'Twig'                         =>   2.00,
 | |
|     'TNSDL'                        =>   2.00,
 | |
|     'TTCN'                         =>   2.00,
 | |
|     'TITAN Project File Information' =>   1.90,
 | |
|     'TypeScript'                   =>   2.00,
 | |
|     'Typst'                        =>   3.00,
 | |
|     'Umka'                         =>   2.00,
 | |
|     'Unity-Prefab'                 =>   2.50,
 | |
|     'Vala'                         =>   1.50,
 | |
|     'Vala Header'                  =>   1.40,
 | |
|     'Verilog-SystemVerilog'        =>   1.51,
 | |
|     'VHDL'                         =>   4.21,
 | |
|     'vim script'                   =>   3.00,
 | |
|     'Visual Basic'                 =>   2.76,
 | |
|     'VB for Applications'          =>   2.76,
 | |
|     'Visual Basic .NET'            =>   2.76,
 | |
|     'Visual Basic Script'          =>   2.76,
 | |
|     'Visual Fox Pro'               =>   4.00, # Visual Fox Pro is not available in the language gearing ratios listed at Mayes Consulting web site
 | |
|     'Visualforce Component'        =>   1.9 ,
 | |
|     'Visualforce Page'             =>   1.9 ,
 | |
|     'Vuejs Component'              =>   2.00,
 | |
|     'Vyper'                        =>   4.20,
 | |
|     'Web Services Description'     =>   1.00,
 | |
|     'WebAssembly'                  =>   0.45,
 | |
|     'WGSL'                         =>   2.50,
 | |
|     'Windows Message File'         =>   1.00,
 | |
|     'Windows Resource File'        =>   1.00,
 | |
|     'Windows Module Definition'    =>   1.00,
 | |
|     'WiX source'                   =>   1.90,
 | |
|     'WiX include'                  =>   1.90,
 | |
|     'WiX string localization'      =>   1.90,
 | |
|     'WXML'                         =>   1.90,
 | |
|     'WXSS'                         =>   1.00,
 | |
|     'xBase'                        =>   2.00,
 | |
|     'xBase Header'                 =>   2.00,
 | |
|     'xlisp'                        =>   1.25,
 | |
|     'X++'                          =>   1.51, # This is a guess. Copied from C++, because the overhead for C++ headers might be equivalent to the overhead of structuring elements in XPO files
 | |
|     'XAML'                         =>   1.90,
 | |
|     'XQuery'                       =>   2.50,
 | |
|     'yacc'                         =>   1.51,
 | |
|     'yacc++'                       =>   1.51,
 | |
|     'YAML'                         =>   0.90,
 | |
|     'Expect'                       => 2.00,
 | |
|     'Gencat NLS'                   => 1.50,
 | |
|     'C/C++ Header'                 => 1.00,
 | |
|     'inc'                          => 1.00,
 | |
|     'lex'                          => 1.00,
 | |
|     'Julia'                        => 4.00,
 | |
|     'MATLAB'                       => 4.00,
 | |
|     'Mathematica'                  => 5.00,
 | |
|     'Mercury'                      => 3.00,
 | |
|     'Maven/XML'                    => 2.5,
 | |
|     'IDL'                          => 3.80,
 | |
|     'Octave'                       => 4.00,
 | |
|     'ML'                           => 3.00,
 | |
|     'Modula3'                      => 2.00,
 | |
|     'Mojom'                        => 2.00,
 | |
|     'Mojo'                         => 4.20,
 | |
|     'PHP'                          => 3.50,
 | |
|     'Jupyter Notebook'             => 4.20,
 | |
|     'Python'                       => 4.20,
 | |
|     'Snakemake'                    => 4.20,
 | |
|     'RapydScript'                  => 4.20,
 | |
|     'Starlark'                     => 4.20,
 | |
|     'BizTalk Pipeline'             => 1.00,
 | |
|     'BizTalk Orchestration'        => 1.00,
 | |
|     'Cython'                       => 3.80,
 | |
|     'Ruby'                         => 4.20,
 | |
|     'Ruby HTML'                    => 4.00,
 | |
|     'sed'                          => 4.00,
 | |
|     'Lua'                          => 4.00,
 | |
|     'Luau'                         => 4.50,
 | |
|     'OpenCL'                       => 1.50,
 | |
|     'Xtend'                        => 2.00,
 | |
|     'Zig'                          => 2.50,
 | |
|     # aggregates; value is meaningless
 | |
|     'C#/Smalltalk'                    => 1.00,
 | |
|     'D/dtrace'                        => 1.00,
 | |
|     'F#/Forth'                        => 1.00,
 | |
|     'Fortran 77/Forth'                => 1.00,
 | |
|     'Lisp/Julia'                      => 1.00,
 | |
|     'Lisp/OpenCL'                     => 1.00,
 | |
|     'PHP/Pascal/Fortran'              => 1.00,
 | |
|     'Pascal/Puppet'                   => 1.00,
 | |
|     'Perl/Prolog'                     => 1.00,
 | |
|     'Raku/Prolog'                     => 1.00,
 | |
|     'Verilog-SystemVerilog/Coq'    => 1.00,
 | |
|     'MATLAB/Mathematica/Objective-C/MUMPS/Mercury' => 1.00,
 | |
|     'IDL/Qt Project/Prolog/ProGuard'     => 1.00,
 | |
| );
 | |
| # 1}}}
 | |
| %{$rh_Known_Binary_Archives} = (             # {{{1
 | |
|             '.tar'     => 1 ,
 | |
|             '.tar.Z'   => 1 ,
 | |
|             '.tar.gz'  => 1 ,
 | |
|             '.tar.bz2' => 1 ,
 | |
|             '.zip'     => 1 ,
 | |
|             '.Zip'     => 1 ,
 | |
|             '.ZIP'     => 1 ,
 | |
|             '.ear'     => 1 ,  # Java
 | |
|             '.war'     => 1 ,  # contained within .ear
 | |
|             '.xz'      => 1 ,
 | |
|             '.whl'     => 1 ,  # Python wheel files (zip)
 | |
|             );
 | |
| # 1}}}
 | |
| } # end sub set_constants()
 | |
| sub check_scale_existence {                  # {{{1
 | |
|     # do a few sanity checks
 | |
|     my ($rhaa_Filters_by_Language,
 | |
|         $rh_Language_by_Extension,
 | |
|         $rh_Scale_Factor) = @_;
 | |
| 
 | |
|     my $OK = 1;
 | |
|     foreach my $language (sort keys %{$rhaa_Filters_by_Language}) {
 | |
|         next if defined $Extension_Collision{$language};
 | |
|         if (!defined $rh_Scale_Factor->{$language}) {
 | |
|             $OK = 0;
 | |
|             warn "Missing scale factor for $language\n";
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     my %seen_it = ();
 | |
|     foreach my $ext (sort keys %{$rh_Language_by_Extension}) {
 | |
|         my $language = $rh_Language_by_Extension->{$ext};
 | |
|         next if defined $Extension_Collision{$language};
 | |
|         next if $seen_it{$language};
 | |
|         if (!$rhaa_Filters_by_Language->{$language}) {
 | |
|             $OK = 0;
 | |
|             warn "Missing language filter for $language\n";
 | |
|         }
 | |
|         $seen_it{$language} = 1;
 | |
|     }
 | |
|     die unless $OK;
 | |
| } # 1}}}
 | |
| sub pre_post_fix {                           # {{{1
 | |
|     # Return the input lines prefixed and postfixed
 | |
|     # by the given strings.
 | |
|     my ($ra_lines, $prefix, $postfix ) = @_;
 | |
|     print "-> pre_post_fix with $prefix, $postfix\n" if $opt_v > 2;
 | |
| 
 | |
|     my $all_lines = $prefix . join(""  , @{$ra_lines}) . $postfix;
 | |
| 
 | |
|     print "<- pre_post_fix\n" if $opt_v > 2;
 | |
|     return split("\n", $all_lines);
 | |
| } # 1}}}
 | |
| sub rm_last_line {                           # {{{1
 | |
|     # Return all but the last line.
 | |
|     my ($ra_lines, ) = @_;
 | |
|     print "-> rm_last_line\n" if $opt_v > 2;
 | |
|     print "<- rm_last_line\n" if $opt_v > 2;
 | |
|     my $n = scalar(@{$ra_lines}) - 2;
 | |
|     return @{$ra_lines}[0..$n];
 | |
| } # 1}}}
 | |
| sub call_regexp_common {                     # {{{1
 | |
|     my ($ra_lines, $language ) = @_;
 | |
|     print "-> call_regexp_common for $language\n" if $opt_v > 2;
 | |
| 
 | |
|     Install_Regexp_Common() unless $HAVE_Rexexp_Common;
 | |
| 
 | |
|     my $all_lines = undef;
 | |
|     if ($language eq "C++") { # Regexp::Common's C++ comment regex is multi-line
 | |
| #       $all_lines = join("\n", @{$ra_lines});
 | |
|         $all_lines = "";
 | |
|         foreach (@{$ra_lines}) {
 | |
|             if (m/\\$/) {  # line ends with a continuation marker
 | |
|                 $all_lines .= $_;
 | |
|             } else {
 | |
|                 $all_lines .= "$_\n";
 | |
|             }
 | |
|         }
 | |
|     } else {
 | |
|         $all_lines = join(""  , @{$ra_lines});
 | |
|     }
 | |
| 
 | |
|     no strict 'vars';
 | |
|     # otherwise get:
 | |
|     #  Global symbol "%RE" requires explicit package name at cloc line xx.
 | |
|     if ($all_lines =~ $RE{comment}{$language}) {
 | |
|         # Suppress "Use of uninitialized value in regexp compilation" that
 | |
|         # pops up when $1 is undefined--happens if there's a bug in the $RE
 | |
|         # This Pascal comment will trigger it:
 | |
|         #         (* This is { another } test. **)
 | |
|         # Curiously, testing for "defined $1" breaks the substitution.
 | |
|         no warnings;
 | |
|         # Remove comments.
 | |
|         $all_lines =~ s/$1//g;
 | |
|     }
 | |
|     # a bogus use of %RE to avoid:
 | |
|     # Name "main::RE" used only once: possible typo at cloc line xx.
 | |
|     print scalar keys %RE if $opt_v < -20;
 | |
|     print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
 | |
|     print "<- call_regexp_common\n" if $opt_v > 2;
 | |
|     return split("\n", $all_lines);
 | |
| } # 1}}}
 | |
| sub plural_form {                            # {{{1
 | |
|     # For getting the right plural form on some English nouns.
 | |
|     my $n = shift @_;
 | |
|     if ($n == 1) { return ( 1, "" ); }
 | |
|     else         { return ($n, "s"); }
 | |
| } # 1}}}
 | |
| sub matlab_or_objective_C {                  # {{{1
 | |
|     # Decide if code is MATLAB, Mathematica, Objective-C, MUMPS, or Mercury
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|         $rs_language , # out
 | |
|        ) = @_;
 | |
|     print "-> matlab_or_objective_C\n" if $opt_v > 2;
 | |
|     # matlab markers:
 | |
|     #   first line starts with "function"
 | |
|     #   some lines start with "%"
 | |
|     #   high marks for lines that start with [
 | |
|     #
 | |
|     # Objective-C markers:
 | |
|     #   must have at least two brace characters, { }
 | |
|     #   has /* ... */ style comments
 | |
|     #   some lines start with @
 | |
|     #   some lines start with #include
 | |
|     #
 | |
|     # MUMPS:
 | |
|     #   has ; comment markers
 | |
|     #   do not match:  \w+\s*=\s*\w
 | |
|     #   lines begin with   \s*\.?\w+\s+\w
 | |
|     #   high marks for lines that start with \s*K\s+ or \s*Kill\s+
 | |
|     #
 | |
|     # Mercury:
 | |
|     #   any line that begins with :- immediately triggers this
 | |
|     #
 | |
|     # Mathematica:
 | |
|     #   (* .. *)
 | |
|     #   BeginPackage
 | |
| 
 | |
|     ${$rs_language} = "";
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     my $DEBUG              = 0;
 | |
| 
 | |
|     my $matlab_points      = 0;
 | |
|     my $mathematica_points = 0;
 | |
|     my $objective_C_points = 0;
 | |
|     my $mumps_points       = 0;
 | |
|     my $mercury_points     = 0;
 | |
|     my $has_braces         = 0;
 | |
|     while (<$IN>) {
 | |
|         ++$has_braces if $_ =~ m/[{}]/;
 | |
| #print "LINE $. has_braces=$has_braces\n";
 | |
|         ++$mumps_points if $. == 1 and m{^[A-Z]};
 | |
|         if      (m{^\s*/\*} or m {^\s*//}) {   #   /* or //
 | |
|             $objective_C_points += 5;
 | |
|             $matlab_points      -= 5;
 | |
| printf ".m:  /*|//  obj C=% 2d  matlab=% 2d  mathematica=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | |
|         } elsif (m{^:-\s+}) {      # gotta be mercury
 | |
|             $mercury_points = 1000;
 | |
|             last;
 | |
|         } elsif (m{\w+\s*=\s*\[}) {      # matrix assignment, very matlab
 | |
|             $matlab_points += 5;
 | |
|         }
 | |
|         if (m{\w+\[}) {      # function call by []
 | |
|             $mathematica_points += 2;
 | |
| printf ".m:  \\w=[   obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | |
|         } elsif (m{^\s*\w+\s*=\s*}) {    # definitely not MUMPS
 | |
|             --$mumps_points;
 | |
| printf ".m:  \\w=    obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | |
|         } elsif (m{^\s*\.?(\w)\s+(\w)} and $1 !~ /\d/ and $2 !~ /\d/) {
 | |
|             ++$mumps_points;
 | |
| printf ".m:  \\w \\w  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | |
|         } elsif (m{^\s*;}) {
 | |
|             ++$mumps_points;
 | |
| printf ".m:  ;      obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | |
|         }
 | |
|         if (m{^\s*#(include|import)}) {
 | |
|             # Objective-C without a doubt
 | |
|             $objective_C_points = 1000;
 | |
|             $matlab_points      = 0;
 | |
| printf ".m: #include obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | |
|             $has_braces         = 2;
 | |
|             last;
 | |
|         } elsif (m{^\s*@(interface|implementation|protocol|public|protected|private|end)\s}o) {
 | |
|             # Objective-C without a doubt
 | |
|             $objective_C_points = 1000;
 | |
|             $matlab_points      = 0;
 | |
| printf ".m: keyword obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | |
|             last;
 | |
|         } elsif (m{^\s*BeginPackage}) {
 | |
|             $mathematica_points += 2;
 | |
|         } elsif (m{^\s*\[}) {             #   line starts with [  -- very matlab
 | |
|             $matlab_points += 5;
 | |
| printf ".m:  [      obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | |
|         } elsif (m{^\sK(ill)?\s+}) {
 | |
|             $mumps_points  += 5;
 | |
| printf ".m:  Kill   obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | |
|         } elsif (m{^\s*function}) {
 | |
|             --$objective_C_points;
 | |
|             ++$matlab_points;
 | |
| printf ".m:  funct  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | |
|         } elsif (m{^\s*%}) {              #   %
 | |
|             # matlab commented line
 | |
|             --$objective_C_points;
 | |
|             ++$matlab_points;
 | |
| printf ".m:  pcent  obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | |
|         }
 | |
|     }
 | |
|     $IN->close;
 | |
| printf "END LOOP    obj C=% 2d  matlab=% 2d  mumps=% 2d  mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
 | |
| 
 | |
|     # next heuristic is unreliable for small files
 | |
| #   $objective_C_points = -9.9e20 unless $has_braces >= 2;
 | |
| 
 | |
|     my %points = ( 'MATLAB'      => $matlab_points     ,
 | |
|                    'Mathematica' => $mathematica_points     ,
 | |
|                    'MUMPS'       => $mumps_points      ,
 | |
|                    'Objective-C' => $objective_C_points,
 | |
|                    'Mercury'     => $mercury_points    , );
 | |
| 
 | |
|     ${$rs_language} = (sort { $points{$b} <=> $points{$a} or $a cmp $b } keys %points)[0];
 | |
| 
 | |
|     print "<- matlab_or_objective_C($file: matlab=$matlab_points, mathematica=$mathematica_points, C=$objective_C_points, mumps=$mumps_points, mercury=$mercury_points) => ${$rs_language}\n"
 | |
|         if $opt_v > 2;
 | |
| 
 | |
| } # 1}}}
 | |
| sub php_pascal_or_fortran {                  # {{{1
 | |
|     # Decide if code is Fortran, PHP, or Pascal
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|         $rs_language , # out
 | |
|        ) = @_;
 | |
|     print "-> php_pascal_or_fortran\n" if $opt_v > 2;
 | |
|     # fortran markers:
 | |
|     #   'implicit none' or 'implicit real' or 'implicit integer'
 | |
|     #   'program' or 'subroutine' or 'function' or 'module'
 | |
|     #   'end program' or 'end subroutine' or 'end function' or 'end module'
 | |
|     #   'write(', 'enddo'
 | |
|     #
 | |
|     # PHP:
 | |
|     #   must have at least two brace characters, { }
 | |
|     #   has /* ... */ style comments
 | |
|     #   some lines start with @
 | |
|     #   some lines start with #include
 | |
|     #
 | |
|     # Pascal:
 | |
|     #   lines ending with ;
 | |
|     #   writeln
 | |
| 
 | |
|     ${$rs_language} = "";
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     my $DEBUG           = 0;
 | |
| 
 | |
|     my $fortran_90      = 0;
 | |
|     my $fortran_points  = 0;
 | |
|     my $php_points      = 0;
 | |
|     my $pascal_points   = 0;
 | |
|     while (<$IN>) {
 | |
|         if (/^\s*<\?php/i) {
 | |
|             $php_points += 100;
 | |
|             last;
 | |
|         } elsif (/^\s*end\./i ) {
 | |
|             $pascal_points += 100;
 | |
|             last;
 | |
|         } elsif (/^\s*implicit\s+(none|real|integer|complex|double)/i) {
 | |
|             $fortran_points += 100;
 | |
|             # don't end here; try to get 77 or 90 flavor
 | |
|         } elsif (/^\s*end;/i ) {
 | |
|             ++$pascal_points;
 | |
|             last;
 | |
|         } elsif (/^\s*program\b/i) {
 | |
|             ++$fortran_points;
 | |
|             ++$pascal_points;
 | |
|         # 'function' is common to all three languages so ignore it
 | |
|         } elsif (/^\s*(subroutine|module|stop)\b/i or
 | |
|                  /^\s*write\s*\(/i                 or
 | |
|                  /^\s*end\s+(program|subroutine)/i or
 | |
|                  /^\s*end\s*(if|do)\b/i) {
 | |
|             ++$fortran_points;
 | |
|         } elsif (/^\s*(writeln|begin|procedure|interface|implementation|const|var)\b/i) {
 | |
|             ++$pascal_points;
 | |
|         } elsif (/^\s*(<|\?>)/) {
 | |
|             ++$php_points;
 | |
|         } elsif (/;\s*$/) {
 | |
|             ++$pascal_points;
 | |
|             ++$php_points;
 | |
|         }
 | |
|         if (/^\s*(integer|real|complex|double|character|logical|public|private).*?::/i or
 | |
|             /^\s*(allocatable|dimension)/i or
 | |
|             /^\s*(end\s+)?(interface|type|function|module)\b/i) {
 | |
|             ++$fortran_90;
 | |
|         }
 | |
|     }
 | |
|     $IN->close;
 | |
| 
 | |
|     my %points = ( 'Fortran'      => $fortran_points     ,
 | |
|                    'PHP'          => $php_points         ,
 | |
|                    'Pascal'       => $pascal_points      , );
 | |
| 
 | |
|     ${$rs_language} = (sort { $points{$b} <=> $points{$a} or $a cmp $b } keys %points)[0];
 | |
|     if (${$rs_language} eq 'Fortran') {
 | |
|         if ($fortran_90) {
 | |
|             ${$rs_language} = 'Fortran 90';
 | |
|         } else {
 | |
|             ${$rs_language} = 'Fortran 77';
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print "<- php_pascal_or_fortran($file: fortran=$fortran_points, php=$php_points, pascal=$pascal_points) => ${$rs_language}\n"
 | |
|         if $opt_v > 2;
 | |
| 
 | |
| } # 1}}}
 | |
| sub Lisp_or_OpenCL {                         # {{{1
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> Lisp_or_OpenCL\n" if $opt_v > 2;
 | |
| 
 | |
|     my $lang = undef;
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return $lang;
 | |
|     }
 | |
|     my $lisp_points   = 0;
 | |
|     my $opcl_points = 0;
 | |
|     while (<$IN>) {
 | |
|         ++$lisp_points if  /^\s*;/;
 | |
|         ++$lisp_points if  /\((def|eval|require|export|let|loop|dec|format)/;
 | |
|         ++$opcl_points if  /^\s*(int|float|const|{)/;
 | |
|     }
 | |
|     $IN->close;
 | |
|     # print "lisp_points=$lisp_points   opcl_points=$opcl_points\n";
 | |
|     if ($lisp_points > $opcl_points) {
 | |
|         $lang = "Lisp";
 | |
|     } else {
 | |
|         $lang = "OpenCL";
 | |
|     }
 | |
| 
 | |
|     print "<- Lisp_or_OpenCL\n" if $opt_v > 2;
 | |
|     return $lang;
 | |
| } # 1}}}
 | |
| sub Lisp_or_Julia {                          # {{{1
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> Lisp_or_Julia\n" if $opt_v > 2;
 | |
| 
 | |
|     my $lang = undef;
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return $lang;
 | |
|     }
 | |
|     my $lisp_points   = 0;
 | |
|     my $julia_points = 0;
 | |
|     while (<$IN>) {
 | |
|         ++$lisp_points if  /^\s*;/;
 | |
|         ++$lisp_points if  /\((def|eval|require|export|let|loop|dec|format)/;
 | |
|         ++$julia_points if  /^\s*(function|end|println|for|while)/;
 | |
|     }
 | |
|     $IN->close;
 | |
|     # print "lisp_points=$lisp_points   julia_points=$julia_points\n";
 | |
|     if ($lisp_points > $julia_points) {
 | |
|         $lang = "Lisp";
 | |
|     } else {
 | |
|         $lang = "Julia";
 | |
|     }
 | |
| 
 | |
|     print "<- Lisp_or_Julia\n" if $opt_v > 2;
 | |
|     return $lang;
 | |
| } # 1}}}
 | |
| sub Perl_or_Prolog {                         # {{{1
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> Perl_or_Prolog\n" if $opt_v > 2;
 | |
| 
 | |
|     my $lang = undef;
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return $lang;
 | |
|     }
 | |
|     my $perl_points = 0;
 | |
|     my $prolog_points = 0;
 | |
|     while (<$IN>) {
 | |
|         next if /^\s*$/;
 | |
|         if ($. == 1 and /^#!.*?\bperl/) {
 | |
|             $perl_points = 100;
 | |
|             last;
 | |
|         }
 | |
|         ++$perl_points   if  /^=(head|over|item|cut)/;
 | |
|         ++$perl_points   if  /;\s*$/;
 | |
|         ++$perl_points   if  /(\{|\})/;
 | |
|         ++$perl_points   if  /^\s*sub\s+/;
 | |
|         ++$perl_points   if  /\s*<<'/;  # start HERE block
 | |
|         ++$perl_points   if  /\$(\w+\->|[_!])/;
 | |
|         ++$prolog_points if !/\s*#/ and /\.\s*$/;
 | |
|         ++$prolog_points if  /:-/;
 | |
|     }
 | |
|     $IN->close;
 | |
|     # print "perl_points=$perl_points   prolog_points=$prolog_points\n";
 | |
|     if ($perl_points > $prolog_points) {
 | |
|         $lang = "Perl";
 | |
|     } else {
 | |
|         $lang = "Prolog";
 | |
|     }
 | |
| 
 | |
|     printf "<- Perl_or_Prolog(%s, Perl=%d Prolog=%d)\n",
 | |
|         $file, $perl_points, $prolog_points if $opt_v > 2;
 | |
|     return $lang;
 | |
| } # 1}}}
 | |
| sub Raku_or_Prolog {                         # {{{1
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> Raku_or_Prolog\n" if $opt_v > 2;
 | |
|     my $lang = Perl_or_Prolog($file, $rh_Err, $raa_errors);
 | |
|     $lang = "Raku" if $lang eq "Perl";
 | |
| 
 | |
|     print "<- Raku_or_Prolog\n" if $opt_v > 2;
 | |
|     return $lang;
 | |
| } # 1}}}
 | |
| sub IDL_or_QtProject {                       # {{{1
 | |
|     # IDL, QtProject, Prolog, or ProGuard
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> IDL_or_QtProject($file)\n" if $opt_v > 2;
 | |
| 
 | |
|     my $lang = undef;
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return $lang;
 | |
|     }
 | |
|     my $idl_points      = 0;
 | |
|     my $qtproj_points   = 0;
 | |
|     my $prolog_points   = 0;
 | |
|     my $proguard_points = 0;
 | |
|     while (<$IN>) {
 | |
|         ++$idl_points      if /^\s*;/;
 | |
|         ++$idl_points      if /plot\(/i;
 | |
|         ++$qtproj_points   if /^\s*(qt|configs|sources|template|target|targetpath|subdirs)\b/i;
 | |
|         ++$qtproj_points   if /qthavemodule/i;
 | |
|         ++$prolog_points   if /\.\s*$/;
 | |
|         ++$prolog_points   if /:-/;
 | |
|         ++$proguard_points if /^\s*#/;
 | |
|         ++$proguard_points if /^-keep/;
 | |
|         ++$proguard_points if /^-(dont)?obfuscate/;
 | |
|     }
 | |
|     $IN->close;
 | |
|     # print "idl_points=$idl_points   qtproj_points=$qtproj_points\n";
 | |
| 
 | |
|     my %points = ( 'IDL'        => $idl_points       ,
 | |
|                    'Qt Project' => $qtproj_points    ,
 | |
|                    'Prolog'     => $prolog_points    ,
 | |
|                    'ProGuard'   => $proguard_points  ,
 | |
|                  );
 | |
| 
 | |
|     $lang = (sort { $points{$b} <=> $points{$a} or $a cmp $b} keys %points)[0];
 | |
| 
 | |
|     print "<- IDL_or_QtProject(idl_points=$idl_points, ",
 | |
|           "qtproj_points=$qtproj_points, prolog_points=$prolog_points, ",
 | |
|           "proguard_points=$proguard_points)\n"
 | |
|            if $opt_v > 2;
 | |
|     return $lang;
 | |
| } # 1}}}
 | |
| sub Ant_or_XML {                             # {{{1
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> Ant_or_XML($file)\n" if $opt_v > 2;
 | |
| 
 | |
|     my $lang = "XML";
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return $lang;
 | |
|     }
 | |
|     my $Ant_points   = 0;
 | |
|     my $XML_points   = 1;
 | |
|     while (<$IN>) {
 | |
|         if (/^\s*<project\s+/) {
 | |
|             ++$Ant_points  ;
 | |
|             --$XML_points  ;
 | |
|         }
 | |
|         if (/xmlns:artifact="antlib:org.apache.maven.artifact.ant"/) {
 | |
|             ++$Ant_points  ;
 | |
|             --$XML_points  ;
 | |
|         }
 | |
|     }
 | |
|     $IN->close;
 | |
| 
 | |
|     if ($XML_points >= $Ant_points) {
 | |
|         # tie or better goes to XML
 | |
|         $lang = "XML";
 | |
|     } else {
 | |
|         $lang = "Ant";
 | |
|     }
 | |
| 
 | |
|     print "<- Ant_or_XML($lang)\n" if $opt_v > 2;
 | |
|     return $lang;
 | |
| } # 1}}}
 | |
| sub Maven_or_XML {                           # {{{1
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> Maven_or_XML($file)\n" if $opt_v > 2;
 | |
| 
 | |
|     my $lang = "XML";
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return $lang;
 | |
|     }
 | |
|     my $Mvn_points   = 0;
 | |
|     my $XML_points   = 1;
 | |
|     while (<$IN>) {
 | |
|         if (/^\s*<project\s+/) {
 | |
|             ++$Mvn_points  ;
 | |
|             --$XML_points  ;
 | |
|         }
 | |
|         if (m{xmlns="http://maven.apache.org/POM/}) {
 | |
|             ++$Mvn_points  ;
 | |
|             --$XML_points  ;
 | |
|         }
 | |
|     }
 | |
|     $IN->close;
 | |
| 
 | |
|     if ($XML_points >= $Mvn_points) {
 | |
|         # tie or better goes to XML
 | |
|         $lang = "XML";
 | |
|     } else {
 | |
|         $lang = "Maven";
 | |
|     }
 | |
| 
 | |
|     print "<- Maven_or_XML($lang)\n" if $opt_v > 2;
 | |
|     return $lang;
 | |
| } # 1}}}
 | |
| sub pascal_or_puppet {                       # {{{1
 | |
|     # Decide if code is Pascal or Puppet manifest
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|         $rs_language , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> pascal_or_puppet\n" if $opt_v > 2;
 | |
| 
 | |
|     ${$rs_language} = "";
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     my $DEBUG              = 0;
 | |
|     my $pascal_points      = 0;
 | |
|     my $puppet_points      = 0;
 | |
| 
 | |
|     while (<$IN>) {
 | |
| 
 | |
|         if ( /^\s*\#\s+/ ) {
 | |
|                 $puppet_points += .001;
 | |
|                 next;
 | |
|         }
 | |
| 
 | |
|         ++$pascal_points if /\bprogram\s+[A-Za-z]/i;
 | |
|         ++$pascal_points if /\bunit\s+[A-Za-z]/i;
 | |
|         ++$pascal_points if /\bmodule\s+[A-Za-z]/i;
 | |
|         ++$pascal_points if /\bprocedure\b/i;
 | |
|         ++$pascal_points if /\bfunction\b/i;
 | |
|         ++$pascal_points if /^\s*interface\s+/i;
 | |
|         ++$pascal_points if /^\s*implementation\s+/i;
 | |
|         ++$pascal_points if /^\s*uses\s+/i;
 | |
|         ++$pascal_points if /(?<!\:\:)\bbegin\b(?!\:\:)/i;
 | |
|         ++$pascal_points if /(?<!\:\:)\bend\b(?!\:\:)/i;
 | |
|         ++$pascal_points if /\:\=/;
 | |
|         ++$pascal_points if /\<\>/;
 | |
|         ++$pascal_points if /^\s*\{\$(I|INCLUDE)\s+.*\}/i;
 | |
|         ++$pascal_points if /writeln/;
 | |
| 
 | |
|         ++$puppet_points if /^\s*class\s+/ and not /class\s+operator\s+/;
 | |
|         ++$puppet_points if /^\s*function\s+[a-z][a-z0-9]+::[a-z][a-z0-9]+\s*/;
 | |
|         ++$puppet_points if /^\s*type\s+[A-Z]\w+::[A-Z]\w+\s+/;
 | |
|         ++$puppet_points if /^\s*case\s+/;
 | |
|         ++$puppet_points if /^\s*package\s+/;
 | |
|         ++$puppet_points if /^\s*file\s+/;
 | |
|         ++$puppet_points if /^\s*include\s\w+/;
 | |
|         ++$puppet_points if /^\s*service\s+/;
 | |
|         ++$puppet_points if /\s\$\w+\s*\=\s*\S/;
 | |
|         ++$puppet_points if /\S\s*\=\>\s*\S/;
 | |
| 
 | |
|         # No need to process rest of file if language seems obvious.
 | |
|         last
 | |
|                 if (abs ($pascal_points - $puppet_points ) > 20 );
 | |
|     }
 | |
|     $IN->close;
 | |
| 
 | |
|     print "<- pascal_or_puppet(pascal=$pascal_points, puppet=$puppet_points)\n"
 | |
|         if $opt_v > 2;
 | |
| 
 | |
|     if ($pascal_points > $puppet_points) {
 | |
|         ${$rs_language} = "Pascal";
 | |
|     } else {
 | |
|         ${$rs_language} = "Puppet";
 | |
|     }
 | |
| 
 | |
| } # 1}}}
 | |
| sub Forth_or_Fortran {                       # {{{1
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> Forth_or_Fortran\n" if $opt_v > 2;
 | |
| 
 | |
|     my $lang = undef;
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return $lang;
 | |
|     }
 | |
|     my $forth_points = 0;
 | |
|     my $fortran_points = 0;
 | |
|     while (<$IN>) {
 | |
|         ++$forth_points if  /^:\s/;
 | |
|         ++$fortran_points if  /^([c*][^a-z]|\s{6,}(subroutine|program|end|implicit)\s|\s*!)/i;
 | |
|     }
 | |
|     $IN->close;
 | |
|     if ($forth_points > $fortran_points) {
 | |
|         $lang = "Forth";
 | |
|     } else {
 | |
|         $lang = "Fortran 77";
 | |
|     }
 | |
| 
 | |
|     print "<- Forth_or_Fortran\n" if $opt_v > 2;
 | |
|     return $lang;
 | |
| } # 1}}}
 | |
| sub Forth_or_Fsharp {                        # {{{1
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> Forth_or_Fsharp\n" if $opt_v > 2;
 | |
| 
 | |
|     my $lang = undef;
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return $lang;
 | |
|     }
 | |
|     my $forth_points = 0;
 | |
|     my $fsharp_points = 0;
 | |
|     while (<$IN>) {
 | |
|         ++$forth_points if  /^:\s/;
 | |
|         ++$fsharp_points if  /^\s*(#light|import|let|module|namespace|open|type)/;
 | |
|     }
 | |
|     $IN->close;
 | |
|     if ($forth_points > $fsharp_points) {
 | |
|         $lang = "Forth";
 | |
|     } else {
 | |
|         $lang = "F#";
 | |
|     }
 | |
| 
 | |
|     print "<- Forth_or_Fsharp\n" if $opt_v > 2;
 | |
|     return $lang;
 | |
| } # 1}}}
 | |
| sub Verilog_or_Coq {                         # {{{1
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> Verilog_or_Coq\n" if $opt_v > 2;
 | |
| 
 | |
|     my $lang = undef;
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return $lang;
 | |
|     }
 | |
|     my $coq_points     = 0;
 | |
|     my $verilog_points = 0;
 | |
|     while (<$IN>) {
 | |
|         ++$verilog_points if  /^\s*(module|begin|input|output|always)/;
 | |
|         ++$coq_points if /\b(Inductive|Fixpoint|Definition|
 | |
|                              Theorem|Lemma|Proof|Qed|forall|
 | |
|                              Section|Check|Notation|Variable|
 | |
|                              Goal|Fail|Require|Scheme|Module|Ltac|
 | |
|                              Set|Unset|Parameter|Coercion|Axiom|
 | |
|                              Locate|Type|Record|Existing|Class)\b/x;
 | |
|     }
 | |
|     $IN->close;
 | |
|     if ($coq_points > $verilog_points) {
 | |
|         $lang = "Coq";
 | |
|     } else {
 | |
|         $lang = "Verilog-SystemVerilog";
 | |
|     }
 | |
| 
 | |
|     print "<- Verilog_or_Coq\n" if $opt_v > 2;
 | |
|     return $lang;
 | |
| } # 1}}}
 | |
| sub TypeScript_or_QtLinguist {               # {{{1
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> TypeScript_or_QtLinguist\n" if $opt_v > 2;
 | |
| 
 | |
|     my $lang = undef;
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return $lang;
 | |
|     }
 | |
|     my $tscript_points  = 0;
 | |
|     my $linguist_points = 0;
 | |
|     while (<$IN>) {
 | |
|         ++$linguist_points if m{\b</?(message|source|translation)>};
 | |
|         ++$tscript_points  if /^\s*(var|const|let|class|document)\b/;
 | |
|         ++$tscript_points  if /[;}]\s*$/;
 | |
|         ++$tscript_points  if m{^\s*//};
 | |
|     }
 | |
|     $IN->close;
 | |
|     if ($tscript_points >= $linguist_points) {
 | |
|         $lang = "TypeScript";
 | |
|     } else {
 | |
|         $lang = "Qt Linguist";
 | |
|     }
 | |
|     print "<- TypeScript_or_QtLinguist\n" if $opt_v > 2;
 | |
|     return $lang;
 | |
| } # 1}}}
 | |
| sub Qt_or_Glade {                            # {{{1
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> Qt_or_Glade\n" if $opt_v > 2;
 | |
| 
 | |
|     my $lang = undef;
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return $lang;
 | |
|     }
 | |
|     my $qt_points    =  1;
 | |
|     my $glade_points = -1;
 | |
|     while (<$IN>) {
 | |
|         if (/generated\s+with\s+glade/i) {
 | |
|             $glade_points =  1;
 | |
|             $qt_points    = -1;
 | |
|             last;
 | |
|         }
 | |
|     }
 | |
|     $IN->close;
 | |
|     if ($glade_points > $qt_points) {
 | |
|         $lang = "Glade";
 | |
|     } else {
 | |
|         $lang = "Qt";
 | |
|     }
 | |
|     print "<- Qt_or_Glade\n" if $opt_v > 2;
 | |
|     return $lang;
 | |
| } # 1}}}
 | |
| sub Csharp_or_Smalltalk {                    # {{{1
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> Csharp_or_Smalltalk($file)\n" if $opt_v > 2;
 | |
| 
 | |
|     my $lang = undef;
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return $lang;
 | |
|     }
 | |
|     my $cs_points        = 0;
 | |
|     my $smalltalk_points = 0;
 | |
|     while (<$IN>) {
 | |
|         s{//.*?$}{};        # strip inline C# comments for better clarity
 | |
|         next if /^\s*$/;
 | |
|         if (/[;}{]\s*$/) {
 | |
|             ++$cs_points       ;
 | |
|         } elsif (/^(using|namespace)\s/) {
 | |
|             $cs_points += 20;
 | |
|         } elsif (/^\s*(public|private|new)\s/) {
 | |
|             $cs_points += 20;
 | |
|         } elsif (/^\s*\[assembly:/) {
 | |
|             ++$cs_points       ;
 | |
|         }
 | |
|         if (/(\!|\]\.)\s*$/) {
 | |
|             ++$smalltalk_points;
 | |
|             --$cs_points       ;
 | |
|         }
 | |
|     }
 | |
|     $IN->close;
 | |
|     if ($smalltalk_points > $cs_points) {
 | |
|         $lang = "Smalltalk";
 | |
|     } else {
 | |
|         $lang = "C#";
 | |
|     }
 | |
|     print "<- Csharp_or_Smalltalk($file)=$lang\n" if $opt_v > 2;
 | |
|     return $lang;
 | |
| } # 1}}}
 | |
| sub Visual_Basic_or_TeX_or_Apex {            # {{{1
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|         $rs_language , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> Visual_Basic_or_TeX_or_Apex($file)\n" if $opt_v > 2;
 | |
| 
 | |
|     my $lang = undef;
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return $lang;
 | |
|     }
 | |
|     my $VB_points        = 0;
 | |
|     my $tex_points       = 0;
 | |
|     my $apex_points      = 0;
 | |
|     while (<$IN>) {
 | |
|         next if /^\s*$/;
 | |
| #print "$_";
 | |
|         if (/\s*%/ or /\s*\\/) {
 | |
|             ++$tex_points   ;
 | |
|         } else {
 | |
|             if (/^\s*(public|private)\s/i) {
 | |
|                 ++$VB_points    ;
 | |
|                 ++$apex_points  ;
 | |
| #print "+VB1 +A1";
 | |
|             } elsif (/^\s*(end|attribute|version)\s/i) {
 | |
|                 ++$VB_points    ;
 | |
| #print "+VB2";
 | |
|             }
 | |
|             if (/[{}]/ or /;\s*$/) {
 | |
|                 ++$apex_points  ;
 | |
| #print "+A2";
 | |
|             }
 | |
| #print "\n";
 | |
|         }
 | |
|     }
 | |
|     $IN->close;
 | |
| 
 | |
|     my %points = ( 'Visual Basic'   => $VB_points   ,
 | |
|                    'TeX'            => $tex_points  ,
 | |
|                    'Apex Class'     => $apex_points ,);
 | |
| 
 | |
|     ${$rs_language} = (sort { $points{$b} <=> $points{$a} or $a cmp $b } keys %points)[0];
 | |
| 
 | |
|     print "<- Visual_Basic_or_TeX_or_Apex($file: VB=$VB_points, TeX=$tex_points, Apex=$apex_points\n" if $opt_v > 2;
 | |
|     return $lang;
 | |
| } # 1}}}
 | |
| sub Scheme_or_SaltStack {                    # {{{1
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> Scheme_or_SaltStack($file)\n" if $opt_v > 2;
 | |
| 
 | |
|     my $lang = undef;
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
 | |
|         return $lang;
 | |
|     }
 | |
|     my $Sch_points = 0;
 | |
|     my $SS_points  = 0;
 | |
|     while (<$IN>) {
 | |
|         next if /^\s*$/;
 | |
|         if (/{\%.*%}/) {
 | |
|             $SS_points += 5;
 | |
|         } elsif (/map\.jinja\b/) {
 | |
|             $SS_points += 5;
 | |
|         } elsif (/\((define|lambda|let|cond|do)\s/) {
 | |
|             $Sch_points += 1;
 | |
|         } else {
 | |
|         }
 | |
|     }
 | |
|     $IN->close;
 | |
| 
 | |
|     print "<- Scheme_or_SaltStack($file: Scheme=$Sch_points, SaltStack=$SS_points\n" if $opt_v > 2;
 | |
|     if ($Sch_points > $SS_points) {
 | |
|         return "Scheme";
 | |
|     } else {
 | |
|         return "SaltStack";
 | |
|     }
 | |
| } # 1}}}
 | |
| sub html_colored_text {                      # {{{1
 | |
|     # http://www.pagetutor.com/pagetutor/makapage/pics/net216-2.gif
 | |
|     my ($color, $text) = @_;
 | |
| #?#die "html_colored_text($text)";
 | |
|     if      ($color =~ /^red$/i)   {
 | |
|         $color = "#ff0000";
 | |
|     } elsif ($color =~ /^green$/i) {
 | |
|         $color = "#00ff00";
 | |
|     } elsif ($color =~ /^blue$/i)  {
 | |
|         $color = "#0000ff";
 | |
|     } elsif ($color =~ /^grey$/i)  {
 | |
|         $color = "#cccccc";
 | |
|     }
 | |
| #   return "" unless $text;
 | |
|     return '<font color="' . $color . '">' . html_metachars($text) . "</font>";
 | |
| } # 1}}}
 | |
| sub xml_metachars {                          # {{{1
 | |
|     # http://en.wikipedia.org/wiki/Character_encodings_in_HTML#XML_character_references
 | |
|     my ($string, ) = shift @_;
 | |
| 
 | |
|     my  @in_chars    = split(//, $string);
 | |
|     my  @out_chars   = ();
 | |
|     foreach my $c (@in_chars) {
 | |
|         if      ($c eq '&') { push @out_chars, '&'
 | |
|         } elsif ($c eq '<') { push @out_chars, '<'
 | |
|         } elsif ($c eq '>') { push @out_chars, '>'
 | |
|         } elsif ($c eq '"') { push @out_chars, '"'
 | |
|         } elsif ($c eq "'") { push @out_chars, '''
 | |
|         } else {
 | |
|             push @out_chars, $c;
 | |
|         }
 | |
|     }
 | |
|     return join "", @out_chars;
 | |
| } # 1}}}
 | |
| sub html_metachars {                         # {{{1
 | |
|     # Replace HTML metacharacters with their printable forms.
 | |
|     # Future:  use HTML-Encoder-0.00_04/lib/HTML/Encoder.pm
 | |
|     # from Fabiano Reese Righetti's HTML::Encoder module if
 | |
|     # this subroutine proves to be too simplistic.
 | |
|     my ($string, ) = shift @_;
 | |
| 
 | |
|     my  @in_chars    = split(//, $string);
 | |
|     my  @out_chars   = ();
 | |
|     foreach my $c (@in_chars) {
 | |
|         if      ($c eq '<') {
 | |
|             push @out_chars, '<'
 | |
|         } elsif ($c eq '>') {
 | |
|             push @out_chars, '>'
 | |
|         } elsif ($c eq '&') {
 | |
|             push @out_chars, '&'
 | |
|         } else {
 | |
|             push @out_chars, $c;
 | |
|         }
 | |
|     }
 | |
|     return join "", @out_chars;
 | |
| } # 1}}}
 | |
| sub test_alg_diff {                          # {{{1
 | |
|     my ($file_1 ,
 | |
|         $file_2 )
 | |
|        = @_;
 | |
|     my $fh_1 = open_file('<', $file_1, 1);
 | |
|     die "Unable to read $file_1:  $!\n" unless defined $fh_1;
 | |
|     chomp(my @lines_1 = <$fh_1>);
 | |
|     $fh_1->close;
 | |
| 
 | |
|     my $fh_2 = open_file('<', $file_2, 1);
 | |
|     die "Unable to read $file_2:  $!\n" unless defined $fh_2;
 | |
|     chomp(my @lines_2 = <$fh_2>);
 | |
|     $fh_2->close;
 | |
| 
 | |
|     my $n_no_change = 0;
 | |
|     my $n_modified  = 0;
 | |
|     my $n_added     = 0;
 | |
|     my $n_deleted   = 0;
 | |
|     my @min_sdiff   = ();
 | |
| my $NN = chr(27) . "[0m";  # normal
 | |
| my $BB = chr(27) . "[1m";  # bold
 | |
| 
 | |
|     my @sdiffs = sdiff( \@lines_1, \@lines_2 );
 | |
|     foreach my $entry (@sdiffs) {
 | |
|         my ($out_1, $out_2) = ('', '');
 | |
|         if ($entry->[0] eq 'u') {
 | |
|             ++$n_no_change;
 | |
|           # $out_1 = $entry->[1];
 | |
|           # $out_2 = $entry->[2];
 | |
|             next;
 | |
|         }
 | |
| #       push @min_sdiff, $entry;
 | |
|         if      ($entry->[0] eq 'c') {
 | |
|             ++$n_modified;
 | |
|             ($out_1, $out_2) = diff_two_strings($entry->[1], $entry->[2]);
 | |
|             $out_1 =~ s/\cA(\w)/${BB}$1${NN}/g;
 | |
|             $out_2 =~ s/\cA(\w)/${BB}$1${NN}/g;
 | |
|           # $out_1 =~ s/\cA//g;
 | |
|           # $out_2 =~ s/\cA//g;
 | |
|         } elsif ($entry->[0] eq '+') {
 | |
|             ++$n_added;
 | |
|             $out_1 = $entry->[1];
 | |
|             $out_2 = $entry->[2];
 | |
|         } elsif ($entry->[0] eq '-') {
 | |
|             ++$n_deleted;
 | |
|             $out_1 = $entry->[1];
 | |
|             $out_2 = $entry->[2];
 | |
|         } elsif ($entry->[0] eq 'u') {
 | |
|         } else { die "unknown entry->[0]=[$entry->[0]]\n"; }
 | |
|         printf "%-80s | %s\n", $out_1, $out_2;
 | |
|     }
 | |
| 
 | |
| #   foreach my $entry (@min_sdiff) {
 | |
| #       printf "DIFF:  %s  %s  %s\n", @{$entry};
 | |
| #   }
 | |
| } # 1}}}
 | |
| sub write_comments_to_html {                 # {{{1
 | |
|     my ($filename      , # in
 | |
|         $rah_diff_L    , # in  see routine array_diff() for explanation
 | |
|         $rah_diff_R    , # in  see routine array_diff() for explanation
 | |
|         $rh_blank      , # in  location and counts of blank lines
 | |
|        ) = @_;
 | |
| 
 | |
|     print "-> write_comments_to_html($filename)\n" if $opt_v > 2;
 | |
|     my $file = $filename . ".html";
 | |
| 
 | |
|     my $approx_line_count = scalar @{$rah_diff_L};
 | |
|        $approx_line_count = 1 unless $approx_line_count;
 | |
|     my $n_digits = 1 + int(log($approx_line_count)/2.30258509299405); # log_10
 | |
| 
 | |
|     my $html_out = html_header($filename);
 | |
| 
 | |
|     my $comment_line_number = 0;
 | |
|     for (my $i = 0; $i < scalar @{$rah_diff_R}; $i++) {
 | |
|         if (defined $rh_blank->{$i}) {
 | |
|             foreach (1..$rh_blank->{$i}) {
 | |
|                 $html_out .= "<!-- blank -->\n";
 | |
|             }
 | |
|         }
 | |
|         my $line_num = "";
 | |
|         my $pre      = "";
 | |
|         my $post     = '</span>  ';
 | |
| warn "undef rah_diff_R[$i]{type} " unless defined $rah_diff_R->[$i]{type};
 | |
|         if ($rah_diff_R->[$i]{type} eq 'nonexist') {
 | |
|             ++$comment_line_number;
 | |
|             $line_num = sprintf "\  <span class=\"clinenum\"> %0${n_digits}d %s",
 | |
|                             $comment_line_number, $post;
 | |
|             $pre = '<span class="comment">';
 | |
|             $html_out .= $line_num;
 | |
|             $html_out .= $pre .
 | |
|                          html_metachars($rah_diff_L->[$i]{char}) .
 | |
|                          $post . "\n";
 | |
|             next;
 | |
|         }
 | |
|         if      ($rah_diff_R->[$i]{type} eq 'code' and
 | |
|                  $rah_diff_R->[$i]{desc} eq 'same') {
 | |
|             # entire line remains as-is
 | |
|             $line_num = sprintf "\  <span class=\"linenum\"> %0${n_digits}d %s",
 | |
|                             $rah_diff_R->[$i]{lnum}, $post;
 | |
|             $pre    = '<span class="normal">';
 | |
|             $html_out .= $line_num;
 | |
|             $html_out .= $pre .
 | |
|                          html_metachars($rah_diff_R->[$i]{char}) . $post;
 | |
| #XX     } elsif ($rah_diff_R->[$i]{type} eq 'code') { # code+comments
 | |
| #XX
 | |
| #XX         $line_num = '<span class="linenum">' .
 | |
| #XX                      $rah_diff_R->[$i]{lnum} . $post;
 | |
| #XX         $html_out .= $line_num;
 | |
| #XX
 | |
| #XX         my @strings = @{$rah_diff_R->[$i]{char}{strings}};
 | |
| #XX         my @type    = @{$rah_diff_R->[$i]{char}{type}};
 | |
| #XX         for (my $i = 0; $i < scalar @strings; $i++) {
 | |
| #XX             if ($type[$i] eq 'u') {
 | |
| #XX                 $pre = '<span class="normal">';
 | |
| #XX             } else {
 | |
| #XX                 $pre = '<span class="comment">';
 | |
| #XX             }
 | |
| #XX             $html_out .= $pre .  html_metachars($strings[$i]) . $post;
 | |
| #XX         }
 | |
| # print Dumper(@strings, @type); die;
 | |
| 
 | |
|         } elsif ($rah_diff_R->[$i]{type} eq 'comment') {
 | |
|             $line_num = '<span class="clinenum">' . $comment_line_number . $post;
 | |
|             # entire line is a comment
 | |
|             $pre    = '<span class="comment">';
 | |
|             $html_out .= $pre .
 | |
|                          html_metachars($rah_diff_R->[$i]{char}) . $post;
 | |
|         }
 | |
| #printf "%-30s %s %-30s\n", $line_1, $separator, $line_2;
 | |
|         $html_out .= "\n";
 | |
|     }
 | |
| 
 | |
|     $html_out .= html_end();
 | |
| 
 | |
|     my $out_file = "$filename.html";
 | |
|     write_file($out_file, {}, ( $html_out ) );
 | |
| 
 | |
|     print "<- write_comments_to_html\n" if $opt_v > 2;
 | |
| } # 1}}}
 | |
| sub array_diff {                             # {{{1
 | |
|     my ($file          , # in  only used for error reporting
 | |
|         $ra_lines_L    , # in  array of lines in Left  file (no blank lines)
 | |
|         $ra_lines_R    , # in  array of lines in Right file (no blank lines)
 | |
|         $mode          , # in  "comment" | "revision"
 | |
|         $rah_diff_L    , # out
 | |
|         $rah_diff_R    , # out
 | |
|         $raa_Errors    , # in/out
 | |
|        ) = @_;
 | |
| 
 | |
|     # This routine operates in two ways:
 | |
|     # A. Computes diffs of the same file with and without comments.
 | |
|     #    This is used to classify lines as code, comments, or blank.
 | |
|     # B. Computes diffs of two revisions of a file.  This method
 | |
|     #    requires a prior run of method A using the older version
 | |
|     #    of the file because it needs lines to be classified.
 | |
| 
 | |
|     # $rah_diff structure:
 | |
|     # An array with n entries where n equals the number of lines in
 | |
|     # an sdiff of the two files.  Each entry in the array describes
 | |
|     # the contents of the corresponding line in file Left and file Right:
 | |
|     #  diff[]{type} = blank | code | code+comment | comment | nonexist
 | |
|     #        {lnum} = line number within the original file (1-based)
 | |
|     #        {desc} = same | added | removed | modified
 | |
|     #        {char} = the input line unless {desc} = 'modified' in
 | |
|     #                 which case
 | |
|     #        {char}{strings} = [ substrings ]
 | |
|     #        {char}{type}    = [ disposition (added, removed, etc)]
 | |
|     #
 | |
| 
 | |
|     @{$rah_diff_L} = ();
 | |
|     @{$rah_diff_R} = ();
 | |
| 
 | |
|     print "-> array_diff()\n" if $opt_v > 2;
 | |
|     my $COMMENT_MODE = 0;
 | |
|        $COMMENT_MODE = 1 if $mode eq "comment";
 | |
| 
 | |
| #print "array_diff(mode=$mode)\n";
 | |
| #print Dumper("block left:" , $ra_lines_L);
 | |
| #print Dumper("block right:", $ra_lines_R);
 | |
| 
 | |
|     my @sdiffs = ();
 | |
|     eval {
 | |
|         local $SIG{ALRM} = sub { die "alarm\n" };
 | |
|         alarm $opt_diff_timeout;
 | |
|         @sdiffs = sdiff($ra_lines_L, $ra_lines_R);
 | |
|         alarm 0;
 | |
|     };
 | |
|     if ($@) {
 | |
|         # timed out
 | |
|         die unless $@ eq "alarm\n"; # propagate unexpected errors
 | |
|         push @{$raa_Errors},
 | |
|              [ $Error_Codes{'Diff error, exceeded timeout'}, $file ];
 | |
|         if ($opt_v) {
 | |
|           warn "array_diff: diff timeout failure for $file--ignoring\n";
 | |
|         }
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     my $n_L        = 0;
 | |
|     my $n_R        = 0;
 | |
|     my $n_sdiff    = 0;  # index to $rah_diff_L, $rah_diff_R
 | |
|     foreach my $triple (@sdiffs) {
 | |
|         my $flag   = $triple->[0];
 | |
|         my $line_L = $triple->[1];
 | |
|         my $line_R = $triple->[2];
 | |
|         $rah_diff_L->[$n_sdiff]{char} = $line_L;
 | |
|         $rah_diff_R->[$n_sdiff]{char} = $line_R;
 | |
|         if      ($flag eq 'u') {  # u = unchanged
 | |
|             ++$n_L;
 | |
|             ++$n_R;
 | |
|             if ($COMMENT_MODE) {
 | |
|                 # line exists in both with & without comments, must be code
 | |
|                 $rah_diff_L->[$n_sdiff]{type} = "code";
 | |
|                 $rah_diff_R->[$n_sdiff]{type} = "code";
 | |
|             }
 | |
|             $rah_diff_L->[$n_sdiff]{desc} = "same";
 | |
|             $rah_diff_R->[$n_sdiff]{desc} = "same";
 | |
|             $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
 | |
|             $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
 | |
|         } elsif ($flag eq 'c') {  # c = changed
 | |
| # warn "per line sdiff() commented out\n"; if (0) {
 | |
|             ++$n_L;
 | |
|             ++$n_R;
 | |
| 
 | |
|             if ($COMMENT_MODE) {
 | |
|                 # line has text both with & without comments;
 | |
|                 # count as code
 | |
|                 $rah_diff_L->[$n_sdiff]{type} = "code";
 | |
|                 $rah_diff_R->[$n_sdiff]{type} = "code";
 | |
|             }
 | |
| 
 | |
|             my @chars_L = split '', $line_L;
 | |
|             my @chars_R = split '', $line_R;
 | |
| 
 | |
|             $rah_diff_L->[$n_sdiff]{desc} = "modified";
 | |
|             $rah_diff_R->[$n_sdiff]{desc} = "modified";
 | |
|             $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
 | |
|             $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
 | |
| 
 | |
|         } elsif ($flag eq '+') {  # + = added
 | |
|             ++$n_R;
 | |
|             if ($COMMENT_MODE) {
 | |
|                 # should never get here, but may due to sdiff() bug,
 | |
|                 # ref https://rt.cpan.org/Public/Bug/Display.html?id=131629
 | |
|                 # Rather than failing, ignore and continue.  A possible
 | |
|                 # consequence is counts may be inconsistent.
 | |
| #####           @{$rah_diff_L} = ();
 | |
| #####           @{$rah_diff_R} = ();
 | |
| #####           push @{$raa_Errors},
 | |
| #####                [ $Error_Codes{'Diff error (quoted comments?)'}, $file ];
 | |
|                 if ($opt_v) {
 | |
|                   warn "array_diff: diff failure (diff says the\n";
 | |
|                   warn "comment-free file has added lines).\n";
 | |
|                   warn "$n_sdiff  $line_L\n";
 | |
|                 }
 | |
|             }
 | |
| ####        $rah_diff_L->[$n_sdiff]{type} = "nonexist";
 | |
|             $rah_diff_L->[$n_sdiff]{type} = "comment";
 | |
|             $rah_diff_L->[$n_sdiff]{desc} = "removed";
 | |
|             $rah_diff_R->[$n_sdiff]{desc} = "added";
 | |
|             $rah_diff_R->[$n_sdiff]{lnum} = $n_R;
 | |
|         } elsif ($flag eq '-') {  # - = removed
 | |
|             ++$n_L;
 | |
|             if ($COMMENT_MODE) {
 | |
|                 # line must be comment because blanks already gone
 | |
|                 $rah_diff_L->[$n_sdiff]{type} = "comment";
 | |
|             }
 | |
|             $rah_diff_R->[$n_sdiff]{type} = "nonexist";
 | |
|             $rah_diff_R->[$n_sdiff]{desc} = "removed";
 | |
|             $rah_diff_L->[$n_sdiff]{desc} = "added";
 | |
|             $rah_diff_L->[$n_sdiff]{lnum} = $n_L;
 | |
|         }
 | |
| #printf "%-30s %s %-30s\n", $line_L, $separator, $line_R;
 | |
|         ++$n_sdiff;
 | |
|     }
 | |
| #use Data::Dumper::Simple;
 | |
| #print Dumper($rah_diff_L, $rah_diff_R);
 | |
| #print Dumper($rah_diff_L);
 | |
| 
 | |
|     print "<- array_diff\n" if $opt_v > 2;
 | |
| } # 1}}}
 | |
| sub remove_leading_dir {                     # {{{1
 | |
|     my @filenames = @_;
 | |
|     #
 | |
|     #  Input should be a list of file names
 | |
|     #  with the same leading directory such as
 | |
|     #
 | |
|     #      dir1/dir2/a.txt
 | |
|     #      dir1/dir2/b.txt
 | |
|     #      dir1/dir2/dir3/c.txt
 | |
|     #
 | |
|     #  Output is the same list minus the common
 | |
|     #  directory path:
 | |
|     #
 | |
|     #      a.txt
 | |
|     #      b.txt
 | |
|     #      dir3/c.txt
 | |
|     #
 | |
|     print "-> remove_leading_dir()\n" if $opt_v > 2;
 | |
|     my @D = (); # a matrix:   [ [ dir1, dir2 ],         # dir1/dir2/a.txt
 | |
|                 #               [ dir1, dir2 ],         # dir1/dir2/b.txt
 | |
|                 #               [ dir1, dir2 , dir3] ]  # dir1/dir2/dir3/c.txt
 | |
|     if ($ON_WINDOWS) {
 | |
|         foreach my $F (@filenames) {
 | |
|             $F =~ s{\\}{/}g;
 | |
|             $F = ucfirst($F) if $F =~ /^\w:/;  # uppercase drive letter
 | |
|         }
 | |
|     }
 | |
|     if (scalar @filenames == 1) {
 | |
|         # special case:  with only one filename
 | |
|         # cannot determine a baseline, just remove first directory level
 | |
|         $filenames[0] =~ s{^.*?/}{};
 | |
|         # print "-> $filenames[0]\n";
 | |
|         return $filenames[0];
 | |
|     }
 | |
|     foreach my $F (@filenames) {
 | |
|         my ($Vol, $Dir, $File) = File::Spec->splitpath($F);
 | |
|         my @x = File::Spec->splitdir( $Dir );
 | |
|         pop @x unless $x[$#x]; # last entry usually null, remove it
 | |
|         if ($ON_WINDOWS) {
 | |
|             if (defined($Vol) and $Vol) {
 | |
|                 # put the drive letter, eg, C:, at the front
 | |
|                 unshift @x, uc $Vol;
 | |
|             }
 | |
|         }
 | |
| #print "F=$F, Dir=$Dir  x=[", join("][", @x), "]\n";
 | |
|         push @D, [ @x ];
 | |
|     }
 | |
| 
 | |
|     # now loop over columns until either they are all
 | |
|     # eliminated or a unique column is found
 | |
| 
 | |
|     my @common   = ();  # to contain the common leading directories
 | |
|     my $mismatch = 0;
 | |
|     while (!$mismatch) {
 | |
|         for (my $row = 1; $row < scalar @D; $row++) {
 | |
| #print "comparing $D[$row][0] to $D[0][0]\n";
 | |
| 
 | |
|             if (!defined $D[$row][0] or !defined $D[0][0] or
 | |
|                 ($D[$row][0] ne $D[0][0])) {
 | |
|                 $mismatch = 1;
 | |
|                 last;
 | |
|             }
 | |
|         }
 | |
| #print "mismatch=$mismatch\n";
 | |
|         if (!$mismatch) {
 | |
|             push @common, $D[0][0];
 | |
|             # all terms in the leading match; unshift the batch
 | |
|             foreach my $ra (@D) {
 | |
|                 shift @{$ra};
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     push @common, " ";  # so that $leading will end with "/ "
 | |
|     my $leading = File::Spec->catdir( @common );
 | |
|        $leading =~ s{ $}{};  # now take back the bogus appended space
 | |
| #print "remove_leading_dir leading=[$leading]\n"; die;
 | |
|     if ($ON_WINDOWS) {
 | |
|        $leading =~ s{\\}{/}g;
 | |
|     }
 | |
|     foreach my $F (@filenames) {
 | |
|         $F =~ s{^$leading}{};
 | |
|     }
 | |
| 
 | |
|     print "<- remove_leading_dir()\n" if $opt_v > 2;
 | |
|     return @filenames;
 | |
| 
 | |
| } # 1}}}
 | |
| sub strip_leading_dir {                      # {{{1
 | |
|     my ($leading, @filenames) = @_;
 | |
|     #  removes the string $leading from each entry in @filenames
 | |
|     print "-> strip_leading_dir()\n" if $opt_v > 2;
 | |
| 
 | |
| #print "remove_leading_dir leading=[$leading]\n"; die;
 | |
|     if ($ON_WINDOWS) {
 | |
|        $leading =~ s{\\}{/}g;
 | |
|         foreach my $F (@filenames) {
 | |
|             $F =~ s{\\}{/}g;
 | |
|         }
 | |
|     }
 | |
|     foreach my $F (@filenames) {
 | |
| #print "strip_leading_dir F before $F\n";
 | |
|         if ($ON_WINDOWS) {
 | |
|             $F =~ s{^$leading}{}i;
 | |
|         } else {
 | |
|             $F =~ s{^$leading}{};
 | |
|         }
 | |
| #print "strip_leading_dir F after  $F\n";
 | |
|     }
 | |
| 
 | |
|     print "<- strip_leading_dir()\n" if $opt_v > 2;
 | |
|     return @filenames;
 | |
| 
 | |
| } # 1}}}
 | |
| sub find_deepest_file {                      # {{{1
 | |
|     my @filenames = @_;
 | |
|     #
 | |
|     #  Input should be a list of file names
 | |
|     #  with the same leading directory such as
 | |
|     #
 | |
|     #      dir1/dir2/a.txt
 | |
|     #      dir1/dir2/b.txt
 | |
|     #      dir1/dir2/dir3/c.txt
 | |
|     #
 | |
|     #  Output is the file with the most parent directories:
 | |
|     #
 | |
|     #      dir1/dir2/dir3/c.txt
 | |
| 
 | |
|     print "-> find_deepest_file()\n" if $opt_v > 2;
 | |
| 
 | |
|     my $deepest    = undef;
 | |
|     my $max_subdir = -1;
 | |
|     foreach my $F (sort @filenames) {
 | |
|         my ($Vol, $Dir, $File) = File::Spec->splitpath($F);
 | |
|         my @x = File::Spec->splitdir( $Dir );
 | |
|         pop @x unless $x[$#x]; # last entry usually null, remove it
 | |
|         if (scalar @x > $max_subdir) {
 | |
|             $deepest    = $F;
 | |
|             $max_subdir = scalar @x;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print "<- find_deepest_file()\n" if $opt_v > 2;
 | |
|     return $deepest;
 | |
| 
 | |
| } # 1}}}
 | |
| sub find_uncommon_parent_dir {               # {{{1
 | |
|     my ($file_L, $file_R) = @_;
 | |
|     #
 | |
|     # example:
 | |
|     #
 | |
|     #   file_L = "perl-5.16.1/cpan/CPANPLUS/lib/CPANPLUS/Internals/Source/SQLite/Tie.pm"
 | |
|     #   file_R = "/tmp/8VxQG0OLbp/perl-5.16.3/cpan/CPANPLUS/lib/CPANPLUS/Internals/Source/SQLite/Tie.pm"
 | |
|     #
 | |
|     # then return
 | |
|     #
 | |
|     #   "perl-5.16.1",
 | |
|     #   "/tmp/8VxQG0OLbp/perl-5.16.3",
 | |
| 
 | |
|     my ($Vol_L, $Dir_L, $File_L) = File::Spec->splitpath($file_L);
 | |
|     my @x_L = File::Spec->splitdir( $Dir_L );
 | |
|     my ($Vol_R, $Dir_R, $File_R) = File::Spec->splitpath($file_R);
 | |
|     my @x_R = File::Spec->splitdir( $Dir_R );
 | |
| 
 | |
|     my @common  = ();
 | |
| 
 | |
|     # work backwards
 | |
|     while ($x_L[$#x_L] eq $x_R[$#x_R]) {
 | |
|         push @common, $x_L[$#x_L];
 | |
|         pop  @x_L;
 | |
|         pop  @x_R;
 | |
|     }
 | |
|     my $success = scalar @common;
 | |
| 
 | |
|     my $dirs_L = File::Spec->catdir( @x_L );
 | |
|     my $dirs_R = File::Spec->catdir( @x_R );
 | |
|     my $lead_L = File::Spec->catpath( $Vol_L, $dirs_L, "" );
 | |
|     my $lead_R = File::Spec->catpath( $Vol_R, $dirs_R, "" );
 | |
| 
 | |
|     return $lead_L, $lead_R, $success;
 | |
| 
 | |
| } # 1}}}
 | |
| sub get_leading_dirs {                       # {{{1
 | |
|     my ($rh_file_list_L, $rh_file_list_R) = @_;
 | |
|     # find uniquely named files in both sets to help determine the
 | |
|     # leading directory positions
 | |
|     my %unique_filename = ();
 | |
|     my %basename_L = ();
 | |
|     my %basename_R = ();
 | |
|     foreach my $f (keys %{$rh_file_list_L}) {
 | |
|         my $bn = basename($f);
 | |
|         $basename_L{ $bn }{'count'}   += 1;
 | |
|         $basename_L{ $bn }{'fullpath'} = $f;
 | |
|     }
 | |
|     foreach my $f (keys %{$rh_file_list_R}) {
 | |
|         my $bn = basename($f);
 | |
|         $basename_R{ $bn }{'count'}   += 1;
 | |
|         $basename_R{ $bn }{'fullpath'} = $f;
 | |
|     }
 | |
|     foreach my $f (keys %basename_L) {
 | |
|         next unless $basename_L{$f}{'count'} == 1;
 | |
|         next unless defined $basename_R{$f} and $basename_R{$f}{'count'} == 1;
 | |
|         $unique_filename{$f}{'L'} = $basename_L{ $f }{'fullpath'};
 | |
|         $unique_filename{$f}{'R'} = $basename_R{ $f }{'fullpath'};
 | |
|     }
 | |
|     return undef, undef, 0 unless %unique_filename;
 | |
| 
 | |
| ####my %candidate_leading_dir_L = ();
 | |
| ####my %candidate_leading_dir_R = ();
 | |
|     my ($L_drop, $R_drop) = (undef, undef);
 | |
|     foreach my $f (keys %unique_filename) {
 | |
|         my $fL = $unique_filename{ $f }{'L'};
 | |
|         my $fR = $unique_filename{ $f }{'R'};
 | |
| 
 | |
|         my @DL = File::Spec->splitdir($fL);
 | |
|         my @DR = File::Spec->splitdir($fR);
 | |
| #printf "%-36s -> %-36s\n", $fL, $fR;
 | |
| #print Dumper(@DL, @DR);
 | |
|         # find the most number of common directories between L and R
 | |
|         if (!defined $L_drop) {
 | |
|             $L_drop = dirname $fL;
 | |
|         }
 | |
|         if (!defined $R_drop) {
 | |
|             $R_drop = dirname $fR;
 | |
|         }
 | |
|         my $n_path_elements_L = scalar @DL;
 | |
|         my $n_path_elements_R = scalar @DR;
 | |
|         my $n_path_elem = $n_path_elements_L < $n_path_elements_R ?
 | |
|                           $n_path_elements_L : $n_path_elements_R;
 | |
|         my ($n_L_drop_this_pair, $n_R_drop_this_pair) = (0, 0);
 | |
|         for (my $i = 0; $i < $n_path_elem; $i++) {
 | |
|             last if $DL[ $#DL - $i] ne $DR[ $#DR - $i];
 | |
|             ++$n_L_drop_this_pair;
 | |
|             ++$n_R_drop_this_pair;
 | |
|         }
 | |
|         my $L_common = File::Spec->catdir( @DL[0..($#DL-$n_L_drop_this_pair)] );
 | |
|         my $R_common = File::Spec->catdir( @DR[0..($#DR-$n_R_drop_this_pair)] );
 | |
| #print "L_common=$L_common\n";
 | |
| #print "R_common=$R_common\n";
 | |
|         $L_drop = $L_common if length $L_common < length $L_drop;
 | |
|         $R_drop = $R_common if length $R_common < length $R_drop;
 | |
| 
 | |
|         $L_drop = $L_drop . "/" if $L_drop;
 | |
|         $R_drop = $R_drop . "/" if $R_drop;
 | |
| ########my $ptr_L = length($fL) - 1;
 | |
| ########my $ptr_R = length($fR) - 1;
 | |
| ########my @aL    = split '', $fL;
 | |
| ########my @aR    = split '', $fR;
 | |
| ########while ($ptr_L >= 0 and $ptr_R >= 0) {
 | |
| ########    last if $aL[$ptr_L] ne $aR[$ptr_R];
 | |
| ########    --$ptr_L;
 | |
| ########    --$ptr_R;
 | |
| ########}
 | |
| ########my $leading_dir_L = "";
 | |
| ########   $leading_dir_L = substr($fL, 0, $ptr_L+1) if $ptr_L >= 0;
 | |
| ########my $leading_dir_R = "";
 | |
| ########   $leading_dir_R = substr($fR, 0, $ptr_R+1) if $ptr_R >= 0;
 | |
| ########++$candidate_leading_dir_L{$leading_dir_L};
 | |
| ########++$candidate_leading_dir_R{$leading_dir_R};
 | |
|     }
 | |
| #use Data::Dumper::Simple;
 | |
|     # at this point path separator on Windows is already /
 | |
| 
 | |
|     while ($L_drop =~ m{//}) {
 | |
|         $L_drop =~ s{//}{/}g;
 | |
|     }
 | |
|     while ($R_drop =~ m{//}) {
 | |
|         $R_drop =~ s{//}{/}g;
 | |
|     }
 | |
| 
 | |
| #print "L_drop=$L_drop\n";
 | |
| #print "R_drop=$R_drop\n";
 | |
|     return $L_drop, $R_drop, 1;
 | |
| ####my $best_L = (sort {
 | |
| ####           $candidate_leading_dir_L{$b} <=>
 | |
| ####           $candidate_leading_dir_L{$a}} keys %candidate_leading_dir_L)[0];
 | |
| ####my $best_R = (sort {
 | |
| ####           $candidate_leading_dir_R{$b} <=>
 | |
| ####           $candidate_leading_dir_R{$a}} keys %candidate_leading_dir_R)[0];
 | |
| ####return $best_L, $best_R, 1;
 | |
| } # 1}}}
 | |
| sub align_by_pairs {                         # {{{1
 | |
|     my ($rh_file_list_L        , # in
 | |
|         $rh_file_list_R        , # in
 | |
|         $ra_added              , # out
 | |
|         $ra_removed            , # out
 | |
|         $ra_compare_list       , # out
 | |
|         ) = @_;
 | |
|     print "-> align_by_pairs()\n" if $opt_v > 2;
 | |
|     @{$ra_compare_list} = ();
 | |
| 
 | |
|     my @files_L = sort keys %{$rh_file_list_L};
 | |
|     my @files_R = sort keys %{$rh_file_list_R};
 | |
|     return () unless @files_L or  @files_R;  # at least one must have stuff
 | |
|     if ($ON_WINDOWS) {
 | |
|         foreach (@files_L) { $_ =~ s{\\}{/}g; }
 | |
|         foreach (@files_R) { $_ =~ s{\\}{/}g; }
 | |
|         if ($opt_ignore_case_ext) {
 | |
|             foreach (@files_L) { $_ = lc $_; }
 | |
|             foreach (@files_R) { $_ = lc $_; }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if      ( @files_L and !@files_R) {
 | |
|         # left side has stuff, right side is empty; everything deleted
 | |
|         @{$ra_added   }     = ();
 | |
|         @{$ra_removed }     = @files_L;
 | |
|         @{$ra_compare_list} = ();
 | |
|         return;
 | |
|     } elsif (!@files_L and  @files_R) {
 | |
|         # left side is empty, right side has stuff; everything added
 | |
|         @{$ra_added   }     = @files_R;
 | |
|         @{$ra_removed }     = ();
 | |
|         @{$ra_compare_list} = ();
 | |
|         return;
 | |
|     } elsif (scalar(@files_L) == 1 and scalar(@files_R) == 1) {
 | |
|         # Special case of comparing one file against another.  In
 | |
|         # this case force the pair to be aligned with each other,
 | |
|         # otherwise the file naming logic will think one file
 | |
|         # was added and the other deleted.
 | |
|         @{$ra_added   }     = ();
 | |
|         @{$ra_removed }     = ();
 | |
|         @{$ra_compare_list} = ( [$files_L[0], $files_R[0]] );
 | |
|         return;
 | |
|     }
 | |
| #use Data::Dumper::Simple;
 | |
| #print Dumper("align_by_pairs", %{$rh_file_list_L}, %{$rh_file_list_R},);
 | |
| #die;
 | |
| 
 | |
|     # The harder case:  compare groups of files.  This only works
 | |
|     # if the groups are in different directories so the first step
 | |
|     # is to strip the leading directory names from file lists to
 | |
|     # make it possible to align by file names.
 | |
|     my @files_L_minus_dir = undef;
 | |
|     my @files_R_minus_dir = undef;
 | |
| 
 | |
|     my $deepest_file_L    = find_deepest_file(@files_L);
 | |
|     my $deepest_file_R    = find_deepest_file(@files_R);
 | |
| #print "deepest L = [$deepest_file_L]\n";
 | |
| #print "deepest R = [$deepest_file_R]\n";
 | |
|     my ($leading_dir_L, $leading_dir_R, $success) =
 | |
|                 get_leading_dirs($rh_file_list_L, $rh_file_list_R);
 | |
| #print "leading_dir_L=[$leading_dir_L]\n";
 | |
| #print "leading_dir_R=[$leading_dir_R]\n";
 | |
| #print "success      =[$success]\n";
 | |
|     if ($success) {
 | |
|         @files_L_minus_dir = strip_leading_dir($leading_dir_L, @files_L);
 | |
|         @files_R_minus_dir = strip_leading_dir($leading_dir_R, @files_R);
 | |
|     } else {
 | |
|         # otherwise fall back to old strategy
 | |
|         @files_L_minus_dir = remove_leading_dir(@files_L);
 | |
|         @files_R_minus_dir = remove_leading_dir(@files_R);
 | |
|     }
 | |
| 
 | |
|     # Keys of the stripped_X arrays are canonical file names;
 | |
|     # should overlap mostly.  Keys in stripped_L but not in
 | |
|     # stripped_R are files that have been deleted.  Keys in
 | |
|     # stripped_R but not in stripped_L have been added.
 | |
|     my %stripped_L = ();
 | |
|        @stripped_L{ @files_L_minus_dir } = @files_L;
 | |
|     my %stripped_R = ();
 | |
|        @stripped_R{ @files_R_minus_dir } = @files_R;
 | |
| 
 | |
|     my %common = ();
 | |
|     foreach my $f (keys %stripped_L) {
 | |
|         $common{$f}  = 1 if     defined $stripped_R{$f};
 | |
|     }
 | |
| 
 | |
|     my %deleted = ();
 | |
|     foreach my $f (keys %stripped_L) {
 | |
|         $deleted{$stripped_L{$f}} = $f unless defined $stripped_R{$f};
 | |
|     }
 | |
| 
 | |
|     my %added = ();
 | |
|     foreach my $f (keys %stripped_R) {
 | |
|         $added{$stripped_R{$f}}   = $f unless defined $stripped_L{$f};
 | |
|     }
 | |
| 
 | |
| #use Data::Dumper::Simple;
 | |
| #print Dumper("align_by_pairs", %stripped_L, %stripped_R);
 | |
| #print Dumper("align_by_pairs", %common, %added, %deleted);
 | |
| 
 | |
|     foreach my $f (sort keys %common) {
 | |
|         push @{$ra_compare_list}, [ $stripped_L{$f},
 | |
|                                     $stripped_R{$f} ];
 | |
|     }
 | |
|     @{$ra_added   } = keys %added  ;
 | |
|     @{$ra_removed } = keys %deleted;
 | |
| 
 | |
|     print "<- align_by_pairs()\n" if $opt_v > 2;
 | |
|     return;
 | |
| #print Dumper("align_by_pairs", @files_L_minus_dir, @files_R_minus_dir);
 | |
| #die;
 | |
| } # 1}}}
 | |
| sub html_header {                            # {{{1
 | |
|     my ($title , ) = @_;
 | |
| 
 | |
|     print "-> html_header\n" if $opt_v > 2;
 | |
|     return
 | |
| '<html>
 | |
| <head>
 | |
| <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 | |
| <meta name="GENERATOR" content="cloc http://github.com/AlDanial/cloc">
 | |
| ' .
 | |
| "
 | |
| <!-- Created by $script v$VERSION -->
 | |
| <title>$title</title>
 | |
| " .
 | |
| '
 | |
| <style TYPE="text/css">
 | |
| <!--
 | |
|     body {
 | |
|         color: black;
 | |
|         background-color: white;
 | |
|         font-family: monospace
 | |
|     }
 | |
| 
 | |
|     .whitespace {
 | |
|         background-color: gray;
 | |
|     }
 | |
| 
 | |
|     .comment {
 | |
|         color: gray;
 | |
|         font-style: italic;
 | |
|     }
 | |
| 
 | |
|     .clinenum {
 | |
|         color: red;
 | |
|     }
 | |
| 
 | |
|     .linenum {
 | |
|         color: green;
 | |
|     }
 | |
|  -->
 | |
| </style>
 | |
| </head>
 | |
| <body>
 | |
| <pre><tt>
 | |
| ';
 | |
|     print "<- html_header\n" if $opt_v > 2;
 | |
| } # 1}}}
 | |
| sub html_end {                               # {{{1
 | |
| return
 | |
| '</tt></pre>
 | |
| </body>
 | |
| </html>
 | |
| ';
 | |
| } # 1}}}
 | |
| sub die_unknown_lang {                       # {{{1
 | |
|     my ($lang, $option_name) = @_;
 | |
|     die "Unknown language '$lang' used with $option_name option.  " .
 | |
|         "The command\n  $script --show-lang\n" .
 | |
|         "will print all recognized languages.  Language names are " .
 | |
|         "case sensitive.\n" ;
 | |
| } # 1}}}
 | |
| sub unicode_file {                           # {{{1
 | |
|     my $file = shift @_;
 | |
| 
 | |
|     print "-> unicode_file($file)\n" if $opt_v > 2;
 | |
|     return 0 if (get_size($file) > 2_000_000);
 | |
|     # don't bother trying to test binary files bigger than 2 MB
 | |
| 
 | |
|     my $IN = open_file('<', $file, 1);
 | |
|     if (!defined $IN) {
 | |
|         warn "Unable to read $file; ignoring.\n";
 | |
|         return 0;
 | |
|     }
 | |
|     my @lines = <$IN>;
 | |
|     $IN->close;
 | |
| 
 | |
|     if (unicode_to_ascii( join('', @lines) )) {
 | |
|         print "<- unicode_file()\n" if $opt_v > 2;
 | |
|         return 1;
 | |
|     } else {
 | |
|         print "<- unicode_file()\n" if $opt_v > 2;
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
| } # 1}}}
 | |
| sub unicode_to_ascii {                       # {{{1
 | |
|     my $string = shift @_;
 | |
| 
 | |
|     # A trivial attempt to convert UTF-16 little or big endian
 | |
|     # files into ASCII.  These files exhibit the following byte
 | |
|     # sequence:
 | |
|     #   byte   1:  255
 | |
|     #   byte   2:  254
 | |
|     #   byte   3:  ord of ASCII character
 | |
|     #   byte   4:    0
 | |
|     #   byte 3+i:  ord of ASCII character
 | |
|     #   byte 4+i:    0
 | |
|     # or
 | |
|     #   byte   1:  255
 | |
|     #   byte   2:  254
 | |
|     #   byte   3:    0
 | |
|     #   byte   4:  ord of ASCII character
 | |
|     #   byte 3+i:    0
 | |
|     #   byte 4+i:  ord of ASCII character
 | |
|     #
 | |
|     print "-> unicode_to_ascii()\n" if $opt_v > 2;
 | |
| 
 | |
|     my $length  = length $string;
 | |
| #print "length=$length\n";
 | |
|     return '' if $length <= 3;
 | |
|     my @unicode = split(//, $string);
 | |
| 
 | |
|     # check the first 100 characters (= 200 bytes) for big or
 | |
|     # little endian UTF-16 encoding
 | |
|     my $max_peek     = $length < 200 ? $length : 200;
 | |
|     my $max_for_pass = $length < 200 ? 0.9*$max_peek/2 : 90;
 | |
|     my @view_1   = ();
 | |
|     for (my $i = 2; $i < $max_peek; $i += 2) { push @view_1, $unicode[$i] }
 | |
|     my @view_2   = ();
 | |
|     for (my $i = 3; $i < $max_peek; $i += 2) { push @view_2, $unicode[$i] }
 | |
| 
 | |
|     my $points_1 = 0;
 | |
|     foreach my $C (@view_1) {
 | |
|         ++$points_1 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
 | |
|                                                           or ord($C) == 10
 | |
|                                                           or ord($C) ==  9;
 | |
|     }
 | |
| 
 | |
|     my $points_2 = 0;
 | |
|     foreach my $C (@view_2) {
 | |
|         ++$points_2 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
 | |
|                                                           or ord($C) == 10
 | |
|                                                           or ord($C) ==  9;
 | |
|     }
 | |
| #print "points 1: $points_1\n";
 | |
| #print "points 2: $points_2\n";
 | |
| #print "max_peek    : $max_peek\n";
 | |
| #print "max_for_pass: $max_for_pass\n";
 | |
| 
 | |
|     my $offset = undef;
 | |
|     if    ($points_1 > $max_for_pass) { $offset = 2; }
 | |
|     elsif ($points_2 > $max_for_pass) { $offset = 3; }
 | |
|     else                   {
 | |
|         print "<- unicode_to_ascii() a p1=$points_1 p2=$points_2\n" if $opt_v > 2;
 | |
|         return '';
 | |
|     }  # neither big or little endian UTF-16
 | |
| 
 | |
|     my @ascii              = ();
 | |
|     for (my $i = $offset; $i < $length; $i += 2) {
 | |
|         # some compound characters are made of HT (9), LF (10), or CR (13)
 | |
|         # True HT, LF, CR are followed by 00; only add those.
 | |
|         my $L = $unicode[$i];
 | |
|         if (ord($L) == 9 or ord($L) == 10 or ord($L) == 13) {
 | |
|             my $companion;
 | |
|             if ($points_1) {
 | |
|                 last if $i+1 >= $length;
 | |
|                 $companion = $unicode[$i+1];
 | |
|             } else {
 | |
|                 $companion = $unicode[$i-1];
 | |
|             }
 | |
|             if (ord($companion) == 0) {
 | |
|                 push @ascii, $L;
 | |
|             } else {
 | |
|                 push @ascii, " ";  # no clue what this letter is
 | |
|             }
 | |
|         } else {
 | |
|             push @ascii, $L;
 | |
|         }
 | |
|     }
 | |
|     print "<- unicode_to_ascii() b p1=$points_1 p2=$points_2\n" if $opt_v > 2;
 | |
|     return join("", @ascii);
 | |
| } # 1}}}
 | |
| sub uncompress_archive_cmd {                 # {{{1
 | |
|     my ($archive_file, ) = @_;
 | |
| 
 | |
|     # Wrap $archive_file in single or double quotes in the system
 | |
|     # commands below to avoid filename chicanery (including
 | |
|     # spaces in the names).
 | |
| 
 | |
|     print "-> uncompress_archive_cmd($archive_file)\n" if $opt_v > 2;
 | |
|     my $extract_cmd = "";
 | |
|     my $missing     = "";
 | |
|     if ($opt_extract_with) {
 | |
|         ( $extract_cmd = $opt_extract_with ) =~ s/>FILE</$archive_file/g;
 | |
|     } elsif (basename($archive_file) eq "-" and !$ON_WINDOWS) {
 | |
|         $extract_cmd = "cat > -";
 | |
|     } elsif ($archive_file =~ /\.tar$/ and $ON_WINDOWS) {
 | |
|         $extract_cmd = "tar -xf \"$archive_file\"";
 | |
|     } elsif (($archive_file =~ /\.tar\.(gz|Z)$/ or
 | |
|               $archive_file =~ /\.tgz$/       ) and !$ON_WINDOWS)    {
 | |
|         if (external_utility_exists("gzip --version")) {
 | |
|             if (external_utility_exists("tar --version")) {
 | |
|                 $extract_cmd = "gzip -dc '$archive_file' | tar xf -";
 | |
|             } else {
 | |
|                 $missing = "tar";
 | |
|             }
 | |
|         } else {
 | |
|             $missing = "gzip";
 | |
|         }
 | |
|     } elsif ($archive_file =~ /\.tar\.bz2$/ and !$ON_WINDOWS)    {
 | |
|         if (external_utility_exists("bzip2 --help")) {
 | |
|             if (external_utility_exists("tar --version")) {
 | |
|                 $extract_cmd = "bzip2 -dc '$archive_file' | tar xf -";
 | |
|             } else {
 | |
|                 $missing = "tar";
 | |
|             }
 | |
|         } else {
 | |
|             $missing = "bzip2";
 | |
|         }
 | |
|     } elsif ($archive_file =~ /\.tar\.xz$/ and !$ON_WINDOWS)    {
 | |
|         if (external_utility_exists("unxz --version")) {
 | |
|             if (external_utility_exists("tar --version")) {
 | |
|                 $extract_cmd = "unxz -dc '$archive_file' | tar xf -";
 | |
|             } else {
 | |
|                 $missing = "tar";
 | |
|             }
 | |
|         } else {
 | |
|             $missing = "bzip2";
 | |
|         }
 | |
|     } elsif ($archive_file =~ /\.tar$/ and !$ON_WINDOWS)    {
 | |
|         $extract_cmd = "tar xf '$archive_file'";
 | |
|     } elsif ($archive_file =~ /\.src\.rpm$/i and !$ON_WINDOWS) {
 | |
|         if (external_utility_exists("cpio --version")) {
 | |
|             if (external_utility_exists("rpm2cpio")) {
 | |
|                 $extract_cmd = "rpm2cpio '$archive_file' | cpio -i";
 | |
|             } else {
 | |
|                 $missing = "rpm2cpio";
 | |
|             }
 | |
|         } else {
 | |
|             $missing = "bzip2";
 | |
|         }
 | |
|     } elsif ($archive_file =~ /\.(whl|zip)$/i and !$ON_WINDOWS)    {
 | |
|         if (external_utility_exists("unzip")) {
 | |
|             $extract_cmd = "unzip -qq -d . '$archive_file'";
 | |
|         } else {
 | |
|             $missing = "unzip";
 | |
|         }
 | |
|     } elsif ($archive_file =~ /\.deb$/i and !$ON_WINDOWS)    {
 | |
|         # only useful if the .deb contains source code--most
 | |
|         # .deb files just have compiled executables
 | |
|         if (external_utility_exists("dpkg-deb")) {
 | |
|             $extract_cmd = "dpkg-deb -x '$archive_file' .";
 | |
|         } else {
 | |
|             $missing = "dpkg-deb";
 | |
|         }
 | |
|     } elsif ($ON_WINDOWS and $archive_file =~ /\.(whl|zip)$/i) {
 | |
|         # use unzip on Windows (comes with git-for-Windows)
 | |
|         if (external_utility_exists("unzip")) {
 | |
|              $extract_cmd = "unzip -qq -d . \"$archive_file\" ";
 | |
|         } else {
 | |
|             $missing = "unzip";
 | |
|         }
 | |
|     }
 | |
|     print "<- uncompress_archive_cmd\n" if $opt_v > 2;
 | |
|     if ($missing) {
 | |
|         die "Unable to expand $archive_file because external\n",
 | |
|             "utility '$missing' is not available.\n",
 | |
|             "Another possibility is to use the --extract-with option.\n";
 | |
|     } else {
 | |
|         return $extract_cmd;
 | |
|     }
 | |
| }
 | |
| # 1}}}
 | |
| sub read_list_file {                         # {{{1
 | |
|     my ($file, ) = @_;
 | |
|     # reads filenames from a STDIN pipe if $file == "-"
 | |
| 
 | |
|     print "-> read_list_file($file)\n" if $opt_v > 2;
 | |
|     my @entry = ();
 | |
| 
 | |
|     if ($file eq "-") {
 | |
|         # read from a STDIN pipe
 | |
|         my $IN;
 | |
|         open($IN, $file);
 | |
|         if (!defined $IN) {
 | |
|             warn "Unable to read $file; ignoring.\n";
 | |
|             return ();
 | |
|         }
 | |
|         while (<$IN>) {
 | |
|             next if /^\s*$/ or /^\s*#/; # skip empty or commented lines
 | |
|             s/\cM$//;  # DOS to Unix
 | |
|             chomp;
 | |
|             push @entry, $_;
 | |
|         }
 | |
|         $IN->close;
 | |
|     } else {
 | |
|         # read from an actual file
 | |
|         foreach my $line (read_file($file)) {
 | |
|             next if $line =~ /^\s*$/ or $line =~ /^\s*#/;
 | |
|             $line =~ s/\cM$//;  # DOS to Unix
 | |
|             chomp $line;
 | |
|             push @entry, $line;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     print "<- read_list_file\n" if $opt_v > 2;
 | |
|     return @entry;
 | |
| }
 | |
| # 1}}}
 | |
| sub external_utility_exists {                # {{{1
 | |
|     my $exe = shift @_;
 | |
| 
 | |
|     my $success      = 0;
 | |
|     if ($ON_WINDOWS) {
 | |
|         $success = 1 unless system $exe . ' > nul';
 | |
|     } else {
 | |
|         $success = 1 unless system $exe . ' >/dev/null 2>&1';
 | |
|         if (!$success) {
 | |
|             $success = 1 unless system "which" . " $exe" . ' >/dev/null 2>&1';
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return $success;
 | |
| } # 1}}}
 | |
| sub write_xsl_file {                         # {{{1
 | |
|     print "-> write_xsl_file\n" if $opt_v > 2;
 | |
|     my $XSL =             # <style>  </style> {{{2
 | |
| '<?xml version="1.0" encoding="UTF-8"?>
 | |
| <!-- XSL file by Paul Schwann, January 2009.
 | |
|      Fixes for by-file and by-file-by-lang by d_uragan, November 2010.
 | |
|      -->
 | |
| <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 | |
|   <xsl:output method="html"/>
 | |
|   <xsl:template match="/">
 | |
|     <html xmlns="http://www.w3.org/1999/xhtml">
 | |
|       <head>
 | |
|         <title>CLOC Results</title>
 | |
|       </head>
 | |
|       <style type="text/css">
 | |
|         table {
 | |
|           table-layout: auto;
 | |
|           border-collapse: collapse;
 | |
|           empty-cells: show;
 | |
|         }
 | |
|         td, th {
 | |
|           padding: 4px;
 | |
|         }
 | |
|         th {
 | |
|           background-color: #CCCCCC;
 | |
|         }
 | |
|         td {
 | |
|           text-align: center;
 | |
|         }
 | |
|         table, td, tr, th {
 | |
|           border: thin solid #999999;
 | |
|         }
 | |
|       </style>
 | |
|       <body>
 | |
|         <h3><xsl:value-of select="results/header"/></h3>
 | |
| ';
 | |
| # 2}}}
 | |
| 
 | |
|     if ($opt_by_file) {
 | |
|         $XSL .=             # <table> </table>{{{2
 | |
| '        <table>
 | |
|           <thead>
 | |
|             <tr>
 | |
|               <th>File</th>
 | |
|               <th>Blank</th>
 | |
|               <th>Comment</th>
 | |
|               <th>Code</th>
 | |
|               <th>Language</th>
 | |
| ';
 | |
|         $XSL .=
 | |
| '             <th>3<sup>rd</sup> Generation Equivalent</th>
 | |
|               <th>Scale</th>
 | |
| ' if $opt_3;
 | |
|         $XSL .=
 | |
| '           </tr>
 | |
|           </thead>
 | |
|           <tbody>
 | |
|           <xsl:for-each select="results/files/file">
 | |
|             <tr>
 | |
|               <th><xsl:value-of select="@name"/></th>
 | |
|               <td><xsl:value-of select="@blank"/></td>
 | |
|               <td><xsl:value-of select="@comment"/></td>
 | |
|               <td><xsl:value-of select="@code"/></td>
 | |
|               <td><xsl:value-of select="@language"/></td>
 | |
| ';
 | |
|         $XSL .=
 | |
| '             <td><xsl:value-of select="@factor"/></td>
 | |
|               <td><xsl:value-of select="@scaled"/></td>
 | |
| ' if $opt_3;
 | |
|         $XSL .=
 | |
| '           </tr>
 | |
|           </xsl:for-each>
 | |
|             <tr>
 | |
|               <th>Total</th>
 | |
|               <th><xsl:value-of select="results/files/total/@blank"/></th>
 | |
|               <th><xsl:value-of select="results/files/total/@comment"/></th>
 | |
|               <th><xsl:value-of select="results/files/total/@code"/></th>
 | |
|               <th><xsl:value-of select="results/files/total/@language"/></th>
 | |
| ';
 | |
|         $XSL .=
 | |
| '             <th><xsl:value-of select="results/files/total/@factor"/></th>
 | |
|               <th><xsl:value-of select="results/files/total/@scaled"/></th>
 | |
| ' if $opt_3;
 | |
|         $XSL .=
 | |
| '           </tr>
 | |
|           </tbody>
 | |
|         </table>
 | |
|         <br/>
 | |
| ';
 | |
| # 2}}}
 | |
|     }
 | |
| 
 | |
|     if (!$opt_by_file or $opt_by_file_by_lang) {
 | |
|         $XSL .=             # <table> </table> {{{2
 | |
| '       <table>
 | |
|           <thead>
 | |
|             <tr>
 | |
|               <th>Language</th>
 | |
|               <th>Files</th>
 | |
|               <th>Blank</th>
 | |
|               <th>Comment</th>
 | |
|               <th>Code</th>
 | |
| ';
 | |
|         $XSL .=
 | |
| '             <th>Scale</th>
 | |
|               <th>3<sup>rd</sup> Generation Equivalent</th>
 | |
| ' if $opt_3;
 | |
|         $XSL .=
 | |
| '           </tr>
 | |
|           </thead>
 | |
|           <tbody>
 | |
|           <xsl:for-each select="results/languages/language">
 | |
|             <tr>
 | |
|               <th><xsl:value-of select="@name"/></th>
 | |
|               <td><xsl:value-of select="@files_count"/></td>
 | |
|               <td><xsl:value-of select="@blank"/></td>
 | |
|               <td><xsl:value-of select="@comment"/></td>
 | |
|               <td><xsl:value-of select="@code"/></td>
 | |
| ';
 | |
|         $XSL .=
 | |
| '             <td><xsl:value-of select="@factor"/></td>
 | |
|               <td><xsl:value-of select="@scaled"/></td>
 | |
| ' if $opt_3;
 | |
|         $XSL .=
 | |
| '          </tr>
 | |
|           </xsl:for-each>
 | |
|             <tr>
 | |
|               <th>Total</th>
 | |
|               <th><xsl:value-of select="results/languages/total/@sum_files"/></th>
 | |
|               <th><xsl:value-of select="results/languages/total/@blank"/></th>
 | |
|               <th><xsl:value-of select="results/languages/total/@comment"/></th>
 | |
|               <th><xsl:value-of select="results/languages/total/@code"/></th>
 | |
| ';
 | |
|         $XSL .=
 | |
| '             <th><xsl:value-of select="results/languages/total/@factor"/></th>
 | |
|               <th><xsl:value-of select="results/languages/total/@scaled"/></th>
 | |
| ' if $opt_3;
 | |
|         $XSL .=
 | |
| '           </tr>
 | |
|           </tbody>
 | |
|         </table>
 | |
| ';
 | |
| # 2}}}
 | |
|     }
 | |
| 
 | |
|     $XSL.= <<'EO_XSL'; # {{{2
 | |
|       </body>
 | |
|     </html>
 | |
|   </xsl:template>
 | |
| </xsl:stylesheet>
 | |
| 
 | |
| EO_XSL
 | |
| # 2}}}
 | |
| 
 | |
|     my $XSL_DIFF = <<'EO_DIFF_XSL'; # {{{2
 | |
| <?xml version="1.0" encoding="UTF-8"?>
 | |
| <!-- XSL file by Blazej Kroll, November 2010 -->
 | |
| <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 | |
|   <xsl:output method="html"/>
 | |
|   <xsl:template match="/">
 | |
|     <html xmlns="http://www.w3.org/1999/xhtml">
 | |
|       <head>
 | |
|         <title>CLOC Results</title>
 | |
|       </head>
 | |
|       <style type="text/css">
 | |
|         table {
 | |
|           table-layout: auto;
 | |
|           border-collapse: collapse;
 | |
|           empty-cells: show;
 | |
|           margin: 1em;
 | |
|         }
 | |
|         td, th {
 | |
|           padding: 4px;
 | |
|         }
 | |
|         th {
 | |
|           background-color: #CCCCCC;
 | |
|         }
 | |
|         td {
 | |
|           text-align: center;
 | |
|         }
 | |
|         table, td, tr, th {
 | |
|           border: thin solid #999999;
 | |
|         }
 | |
|       </style>
 | |
|       <body>
 | |
|         <h3><xsl:value-of select="results/header"/></h3>
 | |
| EO_DIFF_XSL
 | |
| # 2}}}
 | |
| 
 | |
|     if ($opt_by_file) {
 | |
|         $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
 | |
|         <table>
 | |
|           <thead>
 | |
|           <tr><th colspan="4">Same</th>
 | |
|           </tr>
 | |
|             <tr>
 | |
|               <th>File</th>
 | |
|               <th>Blank</th>
 | |
|               <th>Comment</th>
 | |
|               <th>Code</th>
 | |
|             </tr>
 | |
|           </thead>
 | |
|           <tbody>
 | |
|           <xsl:for-each select="diff_results/same/file">
 | |
|             <tr>
 | |
|               <th><xsl:value-of select="@name"/></th>
 | |
|               <td><xsl:value-of select="@blank"/></td>
 | |
|               <td><xsl:value-of select="@comment"/></td>
 | |
|               <td><xsl:value-of select="@code"/></td>
 | |
|             </tr>
 | |
|           </xsl:for-each>
 | |
|           </tbody>
 | |
|         </table>
 | |
| 
 | |
|         <table>
 | |
|           <thead>
 | |
|           <tr><th colspan="4">Modified</th>
 | |
|           </tr>
 | |
|             <tr>
 | |
|               <th>File</th>
 | |
|               <th>Blank</th>
 | |
|               <th>Comment</th>
 | |
|               <th>Code</th>
 | |
|             </tr>
 | |
|           </thead>
 | |
|           <tbody>
 | |
|           <xsl:for-each select="diff_results/modified/file">
 | |
|             <tr>
 | |
|               <th><xsl:value-of select="@name"/></th>
 | |
|               <td><xsl:value-of select="@blank"/></td>
 | |
|               <td><xsl:value-of select="@comment"/></td>
 | |
|               <td><xsl:value-of select="@code"/></td>
 | |
|             </tr>
 | |
|           </xsl:for-each>
 | |
|           </tbody>
 | |
|         </table>
 | |
| 
 | |
|         <table>
 | |
|           <thead>
 | |
|           <tr><th colspan="4">Added</th>
 | |
|           </tr>
 | |
|             <tr>
 | |
|               <th>File</th>
 | |
|               <th>Blank</th>
 | |
|               <th>Comment</th>
 | |
|               <th>Code</th>
 | |
|             </tr>
 | |
|           </thead>
 | |
|           <tbody>
 | |
|           <xsl:for-each select="diff_results/added/file">
 | |
|             <tr>
 | |
|               <th><xsl:value-of select="@name"/></th>
 | |
|               <td><xsl:value-of select="@blank"/></td>
 | |
|               <td><xsl:value-of select="@comment"/></td>
 | |
|               <td><xsl:value-of select="@code"/></td>
 | |
|             </tr>
 | |
|           </xsl:for-each>
 | |
|           </tbody>
 | |
|         </table>
 | |
| 
 | |
|         <table>
 | |
|           <thead>
 | |
|           <tr><th colspan="4">Removed</th>
 | |
|           </tr>
 | |
|             <tr>
 | |
|               <th>File</th>
 | |
|               <th>Blank</th>
 | |
|               <th>Comment</th>
 | |
|               <th>Code</th>
 | |
|             </tr>
 | |
|           </thead>
 | |
|           <tbody>
 | |
|           <xsl:for-each select="diff_results/removed/file">
 | |
|             <tr>
 | |
|               <th><xsl:value-of select="@name"/></th>
 | |
|               <td><xsl:value-of select="@blank"/></td>
 | |
|               <td><xsl:value-of select="@comment"/></td>
 | |
|               <td><xsl:value-of select="@code"/></td>
 | |
|             </tr>
 | |
|           </xsl:for-each>
 | |
|           </tbody>
 | |
|         </table>
 | |
| EO_DIFF_XSL
 | |
| # 2}}}
 | |
|     }
 | |
| 
 | |
|     if (!$opt_by_file or $opt_by_file_by_lang) {
 | |
|         $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
 | |
|         <table>
 | |
|           <thead>
 | |
|           <tr><th colspan="5">Same</th>
 | |
|           </tr>
 | |
|             <tr>
 | |
|               <th>Language</th>
 | |
|               <th>Files</th>
 | |
|               <th>Blank</th>
 | |
|               <th>Comment</th>
 | |
|               <th>Code</th>
 | |
|             </tr>
 | |
|           </thead>
 | |
|           <tbody>
 | |
|           <xsl:for-each select="diff_results/same/language">
 | |
|             <tr>
 | |
|               <th><xsl:value-of select="@name"/></th>
 | |
|               <td><xsl:value-of select="@files_count"/></td>
 | |
|               <td><xsl:value-of select="@blank"/></td>
 | |
|               <td><xsl:value-of select="@comment"/></td>
 | |
|               <td><xsl:value-of select="@code"/></td>
 | |
|             </tr>
 | |
|           </xsl:for-each>
 | |
|           </tbody>
 | |
|         </table>
 | |
| 
 | |
|         <table>
 | |
|           <thead>
 | |
|           <tr><th colspan="5">Modified</th>
 | |
|           </tr>
 | |
|             <tr>
 | |
|               <th>Language</th>
 | |
|               <th>Files</th>
 | |
|               <th>Blank</th>
 | |
|               <th>Comment</th>
 | |
|               <th>Code</th>
 | |
|             </tr>
 | |
|           </thead>
 | |
|           <tbody>
 | |
|           <xsl:for-each select="diff_results/modified/language">
 | |
|             <tr>
 | |
|               <th><xsl:value-of select="@name"/></th>
 | |
|               <td><xsl:value-of select="@files_count"/></td>
 | |
|               <td><xsl:value-of select="@blank"/></td>
 | |
|               <td><xsl:value-of select="@comment"/></td>
 | |
|               <td><xsl:value-of select="@code"/></td>
 | |
|             </tr>
 | |
|           </xsl:for-each>
 | |
|           </tbody>
 | |
|         </table>
 | |
| 
 | |
|         <table>
 | |
|           <thead>
 | |
|           <tr><th colspan="5">Added</th>
 | |
|           </tr>
 | |
|             <tr>
 | |
|               <th>Language</th>
 | |
|               <th>Files</th>
 | |
|               <th>Blank</th>
 | |
|               <th>Comment</th>
 | |
|               <th>Code</th>
 | |
|             </tr>
 | |
|           </thead>
 | |
|           <tbody>
 | |
|           <xsl:for-each select="diff_results/added/language">
 | |
|             <tr>
 | |
|               <th><xsl:value-of select="@name"/></th>
 | |
|               <td><xsl:value-of select="@files_count"/></td>
 | |
|               <td><xsl:value-of select="@blank"/></td>
 | |
|               <td><xsl:value-of select="@comment"/></td>
 | |
|               <td><xsl:value-of select="@code"/></td>
 | |
|             </tr>
 | |
|           </xsl:for-each>
 | |
|           </tbody>
 | |
|         </table>
 | |
| 
 | |
|         <table>
 | |
|           <thead>
 | |
|           <tr><th colspan="5">Removed</th>
 | |
|           </tr>
 | |
|             <tr>
 | |
|               <th>Language</th>
 | |
|               <th>Files</th>
 | |
|               <th>Blank</th>
 | |
|               <th>Comment</th>
 | |
|               <th>Code</th>
 | |
|             </tr>
 | |
|           </thead>
 | |
|           <tbody>
 | |
|           <xsl:for-each select="diff_results/removed/language">
 | |
|             <tr>
 | |
|               <th><xsl:value-of select="@name"/></th>
 | |
|               <td><xsl:value-of select="@files_count"/></td>
 | |
|               <td><xsl:value-of select="@blank"/></td>
 | |
|               <td><xsl:value-of select="@comment"/></td>
 | |
|               <td><xsl:value-of select="@code"/></td>
 | |
|             </tr>
 | |
|           </xsl:for-each>
 | |
|           </tbody>
 | |
|         </table>
 | |
| EO_DIFF_XSL
 | |
| # 2}}}
 | |
| 
 | |
|     }
 | |
| 
 | |
|     $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
 | |
|       </body>
 | |
|     </html>
 | |
|   </xsl:template>
 | |
| </xsl:stylesheet>
 | |
| EO_DIFF_XSL
 | |
| # 2}}}
 | |
|     if ($opt_diff) {
 | |
|         write_file($CLOC_XSL, {}, ( $XSL_DIFF ) );
 | |
|     } else {
 | |
|         write_file($CLOC_XSL, {}, ( $XSL ) );
 | |
|     }
 | |
|     print "<- write_xsl_file\n" if $opt_v > 2;
 | |
| } # 1}}}
 | |
| sub normalize_file_names {                   # {{{1
 | |
|     print "-> normalize_file_names\n" if $opt_v > 2;
 | |
|     my (@files, ) = @_;
 | |
| 
 | |
|     # Returns a hash of file names reduced to a canonical form
 | |
|     # (fully qualified file names, all path separators changed to /,
 | |
|     # Windows file names lowercased).  Hash values are the original
 | |
|     # file name.
 | |
| 
 | |
|     my %normalized = ();
 | |
|     foreach my $F (@files) {
 | |
|         my $F_norm = $F;
 | |
|         if ($ON_WINDOWS) {
 | |
|             $F_norm = lc $F_norm; # for case insensitive file name comparisons
 | |
|             $F_norm =~ s{\\}{/}g; # Windows directory separators to Unix
 | |
|             $F_norm =~ s{^\./}{}g;  # remove leading ./
 | |
|             if (($F_norm !~ m{^/}) and ($F_norm !~ m{^\w:/})) {
 | |
|                 # looks like a relative path; prefix with cwd
 | |
|                 $F_norm = lc "$cwd/$F_norm";
 | |
|             }
 | |
|         } else {
 | |
|             $F_norm =~ s{^\./}{}g;  # remove leading ./
 | |
|             if ($F_norm !~ m{^/}) {
 | |
|                 # looks like a relative path; prefix with cwd
 | |
|                 $F_norm = "$cwd/$F_norm";
 | |
|             }
 | |
|         }
 | |
|         # Remove trailing / so it does not interfere with further regex code
 | |
|         # that does not expect it
 | |
|         $F_norm =~ s{/+$}{};
 | |
|         $normalized{ $F_norm } = $F;
 | |
|     }
 | |
|     print "<- normalize_file_names\n" if $opt_v > 2;
 | |
|     return %normalized;
 | |
| } # 1}}}
 | |
| sub combine_diffs {                          # {{{1
 | |
|     # subroutine by Andy (awalshe@sf.net)
 | |
|     # https://sourceforge.net/tracker/?func=detail&aid=3261017&group_id=174787&atid=870625
 | |
|     my ($ra_files) = @_;
 | |
|     print "-> combine_diffs\n" if $opt_v > 2;
 | |
| 
 | |
|     my $res   = "$URL v $VERSION\n";
 | |
|     my $dl    = '-';
 | |
|     my $width = 79;
 | |
|     # columns are in this order
 | |
|     my @cols  = ('files', 'blank', 'comment', 'code');
 | |
|     my %HoH   = ();
 | |
| 
 | |
|     foreach my $file (@{$ra_files}) {
 | |
|         my $IN = open_file('<', $file, 1);
 | |
|         if (!defined $IN) {
 | |
|             warn "Unable to read $file; ignoring.\n";
 | |
|             next;
 | |
|         }
 | |
| 
 | |
|         my $sec;
 | |
|         while (<$IN>) {
 | |
|             chomp;
 | |
|             s/\cM$//;
 | |
|             next if /^(http|Language|-----)/;
 | |
|             if (/^[A-Za-z0-9]+/) {        # section title
 | |
|                 $sec = $_;
 | |
|                 chomp($sec);
 | |
|                 $HoH{$sec} = () if ! exists $HoH{$sec};
 | |
|                 next;
 | |
|             }
 | |
| 
 | |
|             if (/^\s(same|modified|added|removed)/) {  # calculated totals row
 | |
|                 my @ar = grep { $_ ne '' } split(/ /, $_);
 | |
|                 chomp(@ar);
 | |
|                 my $ttl = shift @ar;
 | |
|                 my $i = 0;
 | |
|                 foreach(@ar) {
 | |
|                     my $t = "${ttl}${dl}${cols[$i]}";
 | |
|                     $HoH{$sec}{$t} = 0 if ! exists $HoH{$sec}{$t};
 | |
|                     $HoH{$sec}{$t} += $_;
 | |
|                     $i++;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|         $IN->close;
 | |
|     }
 | |
| 
 | |
|     # rows are in this order
 | |
|     my @rows = ('same', 'modified', 'added', 'removed');
 | |
| 
 | |
|     $res .= sprintf("%s\n", "-" x $width);
 | |
|     $res .= sprintf("%-19s %14s %14s %14s %14s\n", 'Language',
 | |
|                     $cols[0], $cols[1], $cols[2], $cols[3]);
 | |
|     $res .= sprintf("%s\n", "-" x $width);
 | |
| 
 | |
|     # no inputs? %HoH will be empty
 | |
|     return $res unless %HoH;
 | |
| 
 | |
|     for my $sec ( keys %HoH ) {
 | |
|         next if $sec =~ /SUM:/;
 | |
|         next unless defined $HoH{$sec};  # eg, the header line
 | |
|         $res .= "$sec\n";
 | |
|         foreach (@rows) {
 | |
|             $res .= sprintf(" %-18s %14s %14s %14s %14s\n",
 | |
|                             $_, $HoH{$sec}{"${_}${dl}${cols[0]}"},
 | |
|                                 $HoH{$sec}{"${_}${dl}${cols[1]}"},
 | |
|                                 $HoH{$sec}{"${_}${dl}${cols[2]}"},
 | |
|                                 $HoH{$sec}{"${_}${dl}${cols[3]}"});
 | |
|         }
 | |
|     }
 | |
|     $res .= sprintf("%s\n", "-" x $width);
 | |
|     my $sec = 'SUM:';
 | |
|     $res .= "$sec\n";
 | |
|     foreach (@rows) {
 | |
|         $res .= sprintf(" %-18s %14s %14s %14s %14s\n",
 | |
|                         $_, $HoH{$sec}{"${_}${dl}${cols[0]}"},
 | |
|                             $HoH{$sec}{"${_}${dl}${cols[1]}"},
 | |
|                             $HoH{$sec}{"${_}${dl}${cols[2]}"},
 | |
|                             $HoH{$sec}{"${_}${dl}${cols[3]}"});
 | |
|     }
 | |
|     $res .= sprintf("%s\n", "-" x $width);
 | |
| 
 | |
|     print "<- combine_diffs\n" if $opt_v > 2;
 | |
|     return $res;
 | |
| } # 1}}}
 | |
| sub combine_csv_diffs {                      # {{{1
 | |
|     my ($delimiter, $ra_files) = @_;
 | |
|     print "-> combine_csv_diffs\n" if $opt_v > 2;
 | |
| 
 | |
|     my %sum = ();  # sum{ language } = array of 17 values
 | |
|     foreach my $file (@{$ra_files}) {
 | |
|         my $IN = open_file('<', $file, 1);
 | |
|         if (!defined $IN) {
 | |
|             warn "Unable to read $file; ignoring.\n";
 | |
|             next;
 | |
|         }
 | |
| 
 | |
|         my $sec;
 | |
|         while (<$IN>) {
 | |
|             next if /^Language${delimiter}\s==\sfiles${delimiter}/;
 | |
|             chomp;
 | |
|             my @words = split(/$delimiter/);
 | |
|             my $n_col = scalar(@words);
 | |
|             if ($n_col != 18) {
 | |
|                 warn "combine_csv_diffs(): Parse failure line $. of $file\n";
 | |
|                 warn "Expected 18 columns, got $n_col\n";
 | |
|                 die;
 | |
|             }
 | |
|             my $Lang = $words[0];
 | |
|             my @count = map { int($_) } @words[1..16];
 | |
|             if (defined $sum{$Lang}) {
 | |
|                 for (my $i = 0; $i < 16; $i++) {
 | |
|                     $sum{$Lang}[$i] += $count[$i];
 | |
|                 }
 | |
|             } else {
 | |
|                 @{$sum{$Lang}} = @count;
 | |
|             }
 | |
|         }
 | |
|         $IN->close;
 | |
|     }
 | |
| 
 | |
|     my @header = ("Language", "== files", "!= files", "+ files", "- files",
 | |
|                   "== blank", "!= blank", "+ blank", "- blank", "== comment",
 | |
|                   "!= comment", "+ comment", "- comment", "== code",
 | |
|                   "!= code", "+ code", "- code", "$URL v $VERSION" );
 | |
| 
 | |
|     my $res = join("$delimiter ", @header) . "$delimiter\n";
 | |
|     foreach my $Lang (sort keys %sum) {
 | |
|         $res .= $Lang . "$delimiter ";
 | |
|         for (my $i = 0; $i < 16; $i++) {
 | |
|             $res .= $sum{$Lang}[$i] . "$delimiter ";
 | |
|         }
 | |
|         $res .= "\n";
 | |
|     }
 | |
| 
 | |
|     print "<- combine_csv_diffs\n" if $opt_v > 2;
 | |
|     return $res;
 | |
| } # 1}}}
 | |
| sub get_time {                               # {{{1
 | |
|     if ($HAVE_Time_HiRes) {
 | |
|         return Time::HiRes::time();
 | |
|     } else {
 | |
|         return time();
 | |
|     }
 | |
| } # 1}}}
 | |
| sub really_is_D {                            # {{{1
 | |
|     # Ref bug 131, files ending with .d could be init.d scripts
 | |
|     # instead of D language source files.
 | |
|     my ($file        , # in
 | |
|         $rh_Err      , # in   hash of error codes
 | |
|         $raa_errors  , # out
 | |
|        ) = @_;
 | |
|     print "-> really_is_D($file)\n" if $opt_v > 2;
 | |
|     my ($possible_script, $L) = peek_at_first_line($file, $rh_Err, $raa_errors);
 | |
| 
 | |
|     print "<- really_is_D($file)\n" if $opt_v > 2;
 | |
|     return $possible_script;    # null string if D, otherwise a language
 | |
| } # 1}}}
 | |
| sub no_autogen_files {                       # {{{1
 | |
|     # ref https://github.com/AlDanial/cloc/issues/151
 | |
|     my ($print,) = @_;
 | |
|     print "-> no_autogen($print)\n" if $opt_v > 2;
 | |
| 
 | |
|     # These sometimes created manually?
 | |
|     #               acinclude.m4
 | |
|     #               configure.ac
 | |
|     #               Makefile.am
 | |
| 
 | |
|     my @files = qw (
 | |
|                     aclocal.m4
 | |
|                     announce-gen
 | |
|                     autogen.sh
 | |
|                     bootstrap
 | |
|                     compile
 | |
|                     config.guess
 | |
|                     config.h.in
 | |
|                     config.rpath
 | |
|                     config.status
 | |
|                     config.sub
 | |
|                     configure
 | |
|                     configure.in
 | |
|                     depcomp
 | |
|                     gendocs.sh
 | |
|                     gitlog-to-changelog
 | |
|                     git-version-gen
 | |
|                     gnupload
 | |
|                     gnu-web-doc-update
 | |
|                     install-sh
 | |
|                     libtool
 | |
|                     libtool.m4
 | |
|                     link-warning.h
 | |
|                     ltmain.sh
 | |
|                     lt~obsolete.m4
 | |
|                     ltoptions.m4
 | |
|                     ltsugar.m4
 | |
|                     ltversion.in
 | |
|                     ltversion.m4
 | |
|                     Makefile.in
 | |
|                     mdate-sh
 | |
|                     missing
 | |
|                     mkinstalldirs
 | |
|                     test-driver
 | |
|                     texinfo.tex
 | |
|                     update-copyright
 | |
|                     useless-if-before-free
 | |
|                     vc-list-files
 | |
|                     ylwrap
 | |
|                    );
 | |
| 
 | |
|     if ($print) {
 | |
|         printf "cloc will ignore these %d files with --no-autogen:\n", scalar @files;
 | |
|         foreach my $F (@files) {
 | |
|             print "    $F\n";
 | |
|         }
 | |
|         print "Additionally, Go files with '// Code generated by .* DO NOT EDIT.'\n";
 | |
|         print "on the first line are ignored.\n";
 | |
|     }
 | |
|     print "<- no_autogen()\n" if $opt_v > 2;
 | |
|     return @files;
 | |
| } # 1}}}
 | |
| sub load_from_config_file {                  # {{{1
 | |
|     # Supports all options except --config itself which would
 | |
|     # be pointless.
 | |
|     my ($config_file,
 | |
|                                                  $rs_by_file             ,
 | |
|                                                  $rs_by_file_by_lang     ,
 | |
|                                                  $rs_categorized         ,
 | |
|                                                  $rs_counted             ,
 | |
|                                                  $rs_include_ext         ,
 | |
|                                                  $rs_include_lang        ,
 | |
|                                                  $rs_include_content     ,
 | |
|                                                  $rs_exclude_content     ,
 | |
|                                                  $rs_exclude_lang        ,
 | |
|                                                  $rs_exclude_dir         ,
 | |
|                                                  $rs_exclude_list_file   ,
 | |
|                                                  $rs_explain             ,
 | |
|                                                  $rs_extract_with        ,
 | |
|                                                  $rs_found               ,
 | |
|                                                  $rs_count_diff          ,
 | |
|                                                  $rs_diff_list_files     ,
 | |
|                                                  $rs_diff                ,
 | |
|                                                  $rs_diff_alignment      ,
 | |
|                                                  $rs_diff_timeout        ,
 | |
|                                                  $rs_timeout             ,
 | |
|                                                  $rs_html                ,
 | |
|                                                  $rs_ignored             ,
 | |
|                                                  $rs_quiet               ,
 | |
|                                                  $rs_force_lang_def      ,
 | |
|                                                  $rs_read_lang_def       ,
 | |
|                                                  $rs_show_ext            ,
 | |
|                                                  $rs_show_lang           ,
 | |
|                                                  $rs_progress_rate       ,
 | |
|                                                  $rs_print_filter_stages ,
 | |
|                                                  $rs_report_file         ,
 | |
|                                                  $ra_script_lang         ,
 | |
|                                                  $rs_sdir                ,
 | |
|                                                  $rs_skip_uniqueness     ,
 | |
|                                                  $rs_strip_code          ,
 | |
|                                                  $rs_strip_comments      ,
 | |
|                                                  $rs_original_dir        ,
 | |
|                                                  $rs_sum_reports         ,
 | |
|                                                  $rs_hide_rate           ,
 | |
|                                                  $rs_processes           ,
 | |
|                                                  $rs_unicode             ,
 | |
|                                                  $rs_3                   ,
 | |
|                                                  $rs_v                   ,
 | |
|                                                  $rs_vcs                 ,
 | |
|                                                  $rs_version             ,
 | |
|                                                  $rs_write_lang_def      ,
 | |
|                                                  $rs_write_lang_def_incl_dup,
 | |
|                                                  $rs_xml                 ,
 | |
|                                                  $rs_xsl                 ,
 | |
|                                                  $ra_force_lang          ,
 | |
|                                                  $rs_lang_no_ext         ,
 | |
|                                                  $rs_yaml                ,
 | |
|                                                  $rs_csv                 ,
 | |
|                                                  $rs_csv_delimiter       ,
 | |
|                                                  $rs_json                ,
 | |
|                                                  $rs_md                  ,
 | |
|                                                  $rs_fullpath            ,
 | |
|                                                  $rs_match_f             ,
 | |
|                                                  $ra_not_match_f         ,
 | |
|                                                  $rs_match_d             ,
 | |
|                                                  $ra_not_match_d         ,
 | |
|                                                  $rs_list_file           ,
 | |
|                                                  $rs_help                ,
 | |
|                                                  $rs_skip_win_hidden     ,
 | |
|                                                  $rs_read_binary_files   ,
 | |
|                                                  $rs_sql                 ,
 | |
|                                                  $rs_sql_project         ,
 | |
|                                                  $rs_sql_append          ,
 | |
|                                                  $rs_sql_style           ,
 | |
|                                                  $rs_inline              ,
 | |
|                                                  $rs_exclude_ext         ,
 | |
|                                                  $rs_ignore_whitespace   ,
 | |
|                                                  $rs_ignore_case         ,
 | |
|                                                  $rs_ignore_case_ext     ,
 | |
|                                                  $rs_follow_links        ,
 | |
|                                                  $rs_autoconf            ,
 | |
|                                                  $rs_sum_one             ,
 | |
|                                                  $rs_by_percent          ,
 | |
|                                                  $rs_stdin_name          ,
 | |
|                                                  $rs_force_on_windows    ,
 | |
|                                                  $rs_force_on_unix       ,
 | |
|                                                  $rs_show_os             ,
 | |
|                                                  $rs_skip_archive        ,
 | |
|                                                  $rs_max_file_size       ,
 | |
|                                                  $rs_use_sloccount       ,
 | |
|                                                  $rs_no_autogen          ,
 | |
|                                                  $rs_force_git           ,
 | |
|                                                  $rs_strip_str_comments  ,
 | |
|                                                  $rs_file_encoding       ,
 | |
|                                                  $rs_docstring_as_code   ,
 | |
|                                                  $rs_stat                ,
 | |
|         ) = @_;
 | |
|         # look for runtime configuration file in
 | |
|         #    $ENV{'HOME'}/.config/cloc/options.txt         -> POSIX
 | |
|         #    $ENV{'APPDATA'} . 'cloc'
 | |
| 
 | |
|     print "-> load_from_config_file($config_file)\n" if $opt_v and $opt_v > 2;
 | |
|     if (!is_file($config_file)) {
 | |
|         print "<- load_from_config_file() (no such file: $config_file)\n" if $opt_v and $opt_v > 2;
 | |
|         return;
 | |
|     } elsif (!can_read($config_file)) {
 | |
|         print "<- load_from_config_file() (unable to read $config_file)\n" if $opt_v and $opt_v > 2;
 | |
|         return;
 | |
|     }
 | |
|     print "Reading options from $config_file.\n" if defined $opt_v;
 | |
| 
 | |
|     my $has_force_lang = @{$ra_force_lang};
 | |
|     my $has_script_lang = @{$ra_script_lang};
 | |
|     my @lines = read_file($config_file);
 | |
|     foreach (@lines) {
 | |
|         next if /^\s*$/ or /^\s*#/;
 | |
|         s/\s*--//;
 | |
|         s/^\s+//;
 | |
|         if      (!defined ${$rs_by_file}             and /^(by_file|by-file)/)                                { ${$rs_by_file}            = 1;
 | |
|         } elsif (!defined ${$rs_by_file_by_lang}     and /^(by_file_by_lang|by-file-by-lang)/)                { ${$rs_by_file_by_lang}    = 1;
 | |
|         } elsif (!defined ${$rs_categorized}         and /^categorized(=|\s+)(.*?)$/)                         { ${$rs_categorized}        = $2;
 | |
|         } elsif (!defined ${$rs_counted}             and /^counted(=|\s+)(.*?)$/)                             { ${$rs_counted}            = $2;
 | |
|         } elsif (!defined ${$rs_include_ext}         and /^(?:include_ext|include-ext)(=|\s+)(.*?)$/)         { ${$rs_include_ext}        = $2;
 | |
|         } elsif (!defined ${$rs_include_lang}        and /^(?:include_lang|include-lang)(=|\s+)(.*?)$/)       { ${$rs_include_lang}       = $2;
 | |
|         } elsif (!defined ${$rs_include_content}     and /^(?:include_content|include-content)(=|\s+)(.*?)$/) { ${$rs_include_content}    = $2;
 | |
|         } elsif (!defined ${$rs_exclude_content}     and /^(?:exclude_content|exclude-content)(=|\s+)(.*?)$/) { ${$rs_exclude_content}    = $2;
 | |
|         } elsif (!defined ${$rs_exclude_lang}        and /^(?:exclude_lang|exclude-lang)(=|\s+)(.*?)$/)       { ${$rs_exclude_lang}       = $2;
 | |
|         } elsif (!defined ${$rs_exclude_dir}         and /^(?:exclude_dir|exclude-dir)(=|\s+)(.*?)$/)         { ${$rs_exclude_dir}        = $2;
 | |
|         } elsif (!defined ${$rs_explain}             and /^explain(=|\s+)(.*?)$/)                             { ${$rs_explain}            = $2;
 | |
|         } elsif (!defined ${$rs_extract_with}        and /^(?:extract_with|extract-with)(=|\s+)(.*?)$/)       { ${$rs_extract_with}       = $2;
 | |
|         } elsif (!defined ${$rs_found}               and /^found(=|\s+)(.*?)$/)                               { ${$rs_found}              = $2;
 | |
|         } elsif (!defined ${$rs_count_diff}          and /^(count_and_diff|count-and-diff)/)                  { ${$rs_count_diff}         = 1;
 | |
|         } elsif (!defined ${$rs_diff_list_files}     and /^(diff_list_files|diff-list-files)/)                { ${$rs_diff_list_files}    = 1;
 | |
|         } elsif (!defined ${$rs_diff}                and /^diff/)                                             { ${$rs_diff}               = 1;
 | |
|         } elsif (!defined ${$rs_diff_alignment}      and /^(?:diff-alignment|diff_alignment)(=|\s+)(.*?)$/)   { ${$rs_diff_alignment}     = $2;
 | |
|         } elsif (!defined ${$rs_diff_timeout}        and /^(?:diff-timeout|diff_timeout)(=|\s+)i/)            { ${$rs_diff_timeout}       = $1;
 | |
|         } elsif (!defined ${$rs_timeout}             and /^timeout(=|\s+)i/)                                  { ${$rs_timeout}            = $1;
 | |
|         } elsif (!defined ${$rs_html}                and /^html/)                                             { ${$rs_html}               = 1;
 | |
|         } elsif (!defined ${$rs_ignored}             and /^ignored(=|\s+)(.*?)$/)                             { ${$rs_ignored}            = $2;
 | |
|         } elsif (!defined ${$rs_quiet}               and /^quiet/)                                            { ${$rs_quiet}              = 1;
 | |
|         } elsif (!defined ${$rs_force_lang_def}      and /^(?:force_lang_def|force-lang-def)(=|\s+)(.*?)$/)   { ${$rs_force_lang_def}     = $2;
 | |
|         } elsif (!defined ${$rs_read_lang_def}       and /^(?:read_lang_def|read-lang-def)(=|\s+)(.*?)$/)     { ${$rs_read_lang_def}      = $2;
 | |
|         } elsif (!defined ${$rs_progress_rate}       and /^(?:progress_rate|progress-rate)(=|\s+)(\d+)/)      { ${$rs_progress_rate}      = $2;
 | |
|         } elsif (!defined ${$rs_print_filter_stages} and /^(print_filter_stages|print-filter-stages)/)        { ${$rs_print_filter_stages}= 1;
 | |
|         } elsif (!defined ${$rs_report_file}         and /^(?:report_file|report-file)(=|\s+)(.*?)$/)         { ${$rs_report_file}        = $2;
 | |
|         } elsif (!defined ${$rs_report_file}         and /^out(=|\s+)(.*?)$/)                                 { ${$rs_report_file}        = $2;
 | |
|         } elsif (!defined ${$rs_sdir}                and /^sdir(=|\s+)(.*?)$/)                                { ${$rs_sdir}               = $2;
 | |
|         } elsif (!defined ${$rs_skip_uniqueness}     and /^(skip_uniqueness|skip-uniqueness)/)                { ${$rs_skip_uniqueness}    = 1;
 | |
|         } elsif (!defined ${$rs_strip_code}          and /^(?:strip_code|strip-code)(=|\s+)(.*?)$/)           { ${$rs_strip_code}         = $2;
 | |
|         } elsif (!defined ${$rs_strip_comments}      and /^(?:strip_comments|strip-comments)(=|\s+)(.*?)$/)   { ${$rs_strip_comments}     = $2;
 | |
|         } elsif (!defined ${$rs_original_dir}        and /^(original_dir|original-dir)/)                      { ${$rs_original_dir}       = 1;
 | |
|         } elsif (!defined ${$rs_sum_reports}         and /^(sum_reports|sum-reports)/)                        { ${$rs_sum_reports}        = 1;
 | |
|         } elsif (!defined ${$rs_hide_rate}           and /^(hid_rate|hide-rate)/)                             { ${$rs_hide_rate}          = 1;
 | |
|         } elsif (!defined ${$rs_processes}           and /^processes(=|\s+)(\d+)/)                            { ${$rs_processes}          = $2;
 | |
|         } elsif (!defined ${$rs_unicode}             and /^unicode/)                                          { ${$rs_unicode}            = 1;
 | |
|         } elsif (!defined ${$rs_3}                   and /^3/)                                                { ${$rs_3}                  = 1;
 | |
|         } elsif (!defined ${$rs_vcs}                 and /^vcs(=|\s+)(\S+)/)                                  { ${$rs_vcs}                = $2;
 | |
|         } elsif (!defined ${$rs_version}             and /^version/)                                          { ${$rs_version}            = 1;
 | |
|         } elsif (!defined ${$rs_write_lang_def}      and /^(?:write_lang_def|write-lang-def)(=|\s+)(.*?)$/)   { ${$rs_write_lang_def}     = $2;
 | |
|         } elsif (!defined ${$rs_write_lang_def_incl_dup} and /^(?:write_lang_def_incl_dup|write-lang-def-incl-dup)(=|\s+)(.*?)$/) { ${$rs_write_lang_def_incl_dup} = $2;
 | |
|         } elsif (!defined ${$rs_xml}                 and /^xml/)                                              { ${$rs_xml}                = 1;
 | |
|         } elsif (!defined ${$rs_xsl}                 and /^xsl(=|\s+)(.*?)$/)                                 { ${$rs_xsl}                = $2;
 | |
|         } elsif (!defined ${$rs_lang_no_ext}         and /^(?:lang_no_ext|lang-no-ext)(=|\s+)(.*?)$/)         { ${$rs_lang_no_ext}        = $2;
 | |
|         } elsif (!defined ${$rs_yaml}                and /^yaml/)                                             { ${$rs_yaml}               = 1;
 | |
|         } elsif (!defined ${$rs_csv}                 and /^csv/)                                              { ${$rs_csv}                = 1;
 | |
|         } elsif (!defined ${$rs_csv_delimiter}       and /^(?:csv_delimiter|csv-delimiter)(=|\s+)(.*?)$/)     { ${$rs_csv_delimiter}      = $2;
 | |
|         } elsif (!defined ${$rs_json}                and /^json/)                                             { ${$rs_json}               = 1;
 | |
|         } elsif (!defined ${$rs_md}                  and /^md/)                                               { ${$rs_md}                 = 1;
 | |
|         } elsif (!defined ${$rs_fullpath}            and /^fullpath/)                                         { ${$rs_fullpath}           = 1;
 | |
|         } elsif (!defined ${$rs_match_f}             and /^(?:match_f|match-f)(=|\s+)(.*?)$/)                 { ${$rs_match_f}            = $2;
 | |
|         } elsif (!        @{$ra_not_match_f}         and /^(?:not_match_f|not-match-f)(=|\s+)(.*?)$/)         { push @{$ra_not_match_f}   , $2;
 | |
|         } elsif (!defined ${$rs_match_d}             and /^(?:match_d|match-d)(=|\s+)(.*?)$/)                 { ${$rs_match_d}            = $2;
 | |
|         } elsif (!        @{$ra_not_match_d}         and /^(?:not_match_d|not-match-d)(=|\s+)(.*?)$/)         { push @{$ra_not_match_d}   , $2;
 | |
|         } elsif (!defined ${$rs_list_file}           and /^(?:list_file|list-file)(=|\s+)(.*?)$/)             { ${$rs_list_file}          = $2;
 | |
|         } elsif (!defined ${$rs_help}                and /^help/)                                             { ${$rs_help}               = 1;
 | |
|         } elsif (!defined ${$rs_skip_win_hidden}     and /^(skip_win_hidden|skip-win-hidden)/)                { ${$rs_skip_win_hidden}    = 1;
 | |
|         } elsif (!defined ${$rs_read_binary_files}   and /^(read_binary_files|read-binary-files)/)            { ${$rs_read_binary_files}  = 1;
 | |
|         } elsif (!defined ${$rs_sql}                 and /^sql(=|\s+)(.*?)$/)                                 { ${$rs_sql}                = $2;
 | |
|         } elsif (!defined ${$rs_sql_project}         and /^(?:sql_project|sql-project)(=|\s+)(.*?)$/)         { ${$rs_sql_project}        = $2;
 | |
|         } elsif (!defined ${$rs_sql_append}          and /^(sql_append|sql-append)/)                          { ${$rs_sql_append}         = 1;
 | |
|         } elsif (!defined ${$rs_sql_style}           and /^(?:sql_style|sql-style)(=|\s+)(.*?)$/)             { ${$rs_sql_style}          = $2;
 | |
|         } elsif (!defined ${$rs_inline}              and /^inline/)                                           { ${$rs_inline}             = 1;
 | |
|         } elsif (!defined ${$rs_exclude_ext}         and /^(?:exclude_ext|exclude-ext)(=|\s+)(.*?)$/)         { ${$rs_exclude_ext}        = $2;
 | |
|         } elsif (!defined ${$rs_ignore_whitespace}   and /^(ignore_whitespace|ignore-whitespace)/)            { ${$rs_ignore_whitespace}  = 1;
 | |
|         } elsif (!defined ${$rs_ignore_case_ext}     and /^(ignore_case_ext|ignore-case-ext)/)                { ${$rs_ignore_case_ext}    = 1;
 | |
|         } elsif (!defined ${$rs_ignore_case}         and /^(ignore_case|ignore-case)/)                        { ${$rs_ignore_case}        = 1;
 | |
|         } elsif (!defined ${$rs_follow_links}        and /^(follow_links|follow-links)/)                      { ${$rs_follow_links}       = 1;
 | |
|         } elsif (!defined ${$rs_autoconf}            and /^autoconf/)                                         { ${$rs_autoconf}           = 1;
 | |
|         } elsif (!defined ${$rs_sum_one}             and /^(sum_one|sum-one)/)                                { ${$rs_sum_one}            = 1;
 | |
|         } elsif (!defined ${$rs_by_percent}          and /^(?:by_percent|by-percent)(=|\s+)(.*?)$/)           { ${$rs_by_percent}         = $2;
 | |
|         } elsif (!defined ${$rs_stdin_name}          and /^(?:stdin_name|stdin-name)(=|\s+)(.*?)$/)           { ${$rs_stdin_name}         = $2;
 | |
|         } elsif (!defined ${$rs_force_on_windows}    and /^windows/)                                          { ${$rs_force_on_windows}   = 1;
 | |
|         } elsif (!defined ${$rs_force_on_unix}       and /^unix/)                                             { ${$rs_force_on_unix}      = 1;
 | |
|         } elsif (!defined ${$rs_show_os}             and /^(show_os|show-os)/)                                { ${$rs_show_os}            = 1;
 | |
|         } elsif (!defined ${$rs_skip_archive}        and /^(?:skip_archive|skip-archive)(=|\s+)(.*?)$/)       { ${$rs_skip_archive}       = $2;
 | |
|         } elsif (!defined ${$rs_max_file_size}       and /^(?:max_file_size|max-file-size)(=|\s+)(\d+)/)      { ${$rs_max_file_size}      = $2;
 | |
|         } elsif (!defined ${$rs_use_sloccount}       and /^(use_sloccount|use-sloccount)/)                    { ${$rs_use_sloccount}      = 1;
 | |
|         } elsif (!defined ${$rs_no_autogen}          and /^(no_autogen|no-autogen)/)                          { ${$rs_no_autogen}         = 1;
 | |
|         } elsif (!defined ${$rs_force_git}           and /^git/)                                              { ${$rs_force_git}          = 1;
 | |
|         } elsif (!defined ${$rs_exclude_list_file}   and /^(?:exclude_list_file|exclude-list-file)(=|\s+)(.*?)$/)
 | |
|                                                                    { ${$rs_exclude_list_file}  = $2;
 | |
|         } elsif (!defined ${$rs_v} and /^(verbose|v)((=|\s+)(\d+))?/) {
 | |
|             if (!defined $4) { ${$rs_v} =  0; }
 | |
|             else             { ${$rs_v} = $4; }
 | |
|         } elsif (!$has_script_lang and /^(?:script_lang|script-lang)(=|\s+)(.*?)$/)         {
 | |
|                                                             push @{$ra_script_lang}          , $2;
 | |
|         } elsif (!$has_force_lang and /^(?:force_lang|force-lang)(=|\s+)(.*?)$/)           {
 | |
|                                                             push @{$ra_force_lang}           , $2;
 | |
|         } elsif (!defined ${$rs_show_ext}          and /^(show_ext|show-ext)((=|\s+)(.*))?$/)  {
 | |
|             if (!defined $4) { ${$rs_show_ext} =  0; }
 | |
|             else             { ${$rs_show_ext} = $4; }
 | |
|         } elsif (!defined ${$rs_show_lang}         and /^(show_lang|show-lang)((=|\s+)(.*))?s/){
 | |
|             if (!defined $4) { ${$rs_show_lang} =  0; }
 | |
|             else             { ${$rs_show_lang} = $4; }
 | |
|         } elsif (!defined ${$rs_strip_str_comments}  and /^(strip_str_comments|strip-str-comments)/)     { ${$rs_strip_str_comments} = 1;
 | |
|         } elsif (!defined ${$rs_file_encoding}       and /^(?:file_encoding|file-encoding)(=|\s+)(\S+)/) { ${$rs_file_encoding}      = $2;
 | |
|         } elsif (!defined ${$rs_docstring_as_code}   and /^(docstring_as_code|docstring-as-code)/)       { ${$rs_docstring_as_code}  = 1;
 | |
|         } elsif (!defined ${$rs_stat}                and /stat/)                                         { ${$rs_stat}               = 1;
 | |
|         }
 | |
| 
 | |
|     }
 | |
| } # 1}}}
 | |
| sub trick_pp_packer_encode {                 # {{{1
 | |
|     use Encode;
 | |
|     # PAR::Packer gives 'Unknown PerlIO layer "encoding"' unless it is
 | |
|     # forced into using this module.
 | |
|     my ($OUT, $JunkFile) = tempfile(UNLINK => 1);  # delete on exit
 | |
|     open($OUT, "> :encoding(utf8)", $JunkFile);
 | |
|     close($OUT);
 | |
| }
 | |
| # 1}}}
 | |
| sub really_is_smarty {                       # {{{1
 | |
|     # Given filename, returns TRUE if its contents look like Smarty template
 | |
|     my ($filename, ) = @_;
 | |
| 
 | |
|     print "-> really_is_smarty($filename)\n" if $opt_v > 2;
 | |
| 
 | |
|     my @lines = read_file($filename);
 | |
| 
 | |
|     my $points = 0;
 | |
|     foreach my $L (@lines) {
 | |
|         if (($L =~ /\{(if|include)\s/) or
 | |
|             ($L =~ /\{\/if\}/)         or
 | |
|             ($L =~ /(\{\*|\*\})/)      or
 | |
|             ($L =~ /\{\$\w/)) {
 | |
|             ++$points;
 | |
|         }
 | |
|         last if $points >= 2;
 | |
|     }
 | |
|     print "<- really_is_smarty(points=$points)\n" if $opt_v > 2;
 | |
|     return $points >= 2;
 | |
| } # 1}}}
 | |
| sub check_alternate_config_files {           # {{{1
 | |
|     my ($list_file, $exclude_list_file, $read_lang_def,
 | |
|         $force_lang_def, $diff_list_file, ) = @_;
 | |
|     my $found_it = "";
 | |
|     foreach my $file ($list_file,
 | |
|                       $exclude_list_file,
 | |
|                       $read_lang_def,
 | |
|                       $force_lang_def,
 | |
|                       $diff_list_file ) {
 | |
|         next unless defined $file;
 | |
|         my $dir = dirname $file;
 | |
|         next unless can_read($dir) and is_dir($dir);
 | |
|         my $bn = basename $config_file;
 | |
|         if (can_read("$dir/$bn")) {
 | |
|             $found_it = "$dir/$bn";
 | |
|             print "Using configuration file $found_it\n" if $opt_v;
 | |
|             last;
 | |
|         }
 | |
|     }
 | |
|     return $found_it;
 | |
| }
 | |
| # 1}}}
 | |
| sub write_null_results {                     # {{{
 | |
|     my ($json, $xml, $report_file,) = @_;
 | |
|     print "-> write_null_results\n" if $opt_v > 2;
 | |
|     if ((defined $json) or (defined $xml)) {
 | |
|         my $line = "";
 | |
|         if (defined $json) {
 | |
|             $line = "{}";
 | |
|         } else {
 | |
|             $line = '<?xml version="1.0" encoding="UTF-8"?><results/>';
 | |
|         }
 | |
|         if (defined $report_file) {
 | |
|             open OUT, ">$report_file" or die "Cannot write to $report_file $!\n";
 | |
|             print OUT "$line\n";
 | |
|             close OUT;
 | |
|         } else {
 | |
|             print "$line\n";
 | |
|         }
 | |
|     }
 | |
|     print "<- write_null_results\n" if $opt_v > 2;
 | |
| } # }}}
 | |
| sub glob2regex {                             # {{{
 | |
|     # convert simple xpath-style glob pattern to a regex
 | |
|     my $globstr = shift;
 | |
|     my $re = $globstr;
 | |
|     $re =~ s{^["']}{};
 | |
|     $re =~ s{^\.\/}{};
 | |
|     $re =~ s{["']$}{};
 | |
|     $re =~ s{\.}{\\.}g;
 | |
|     $re =~ s{\*\*}{\cx}g;  # ctrl x  = .*?
 | |
|     $re =~ s{\*}{\cy}g;    # ctrl y = [^/]*
 | |
|     $re =~ s{\cx}{.*?}g;
 | |
|     $re =~ s{\cy}{[^/]*}g;
 | |
|     return '^' . $re . '$';
 | |
| } # }}}
 | |
| sub load_json {                              # {{{1
 | |
|     #
 | |
|     # Load a cloc-generated JSON string into %contents
 | |
|     #   $contents{filename}{blank|comment|code|language} = value
 | |
|     # then print in a variety of formats.
 | |
|     #
 | |
|     my ($json_string, ) = @_;
 | |
|     print "-> load_json()\n" if $opt_v > 2;
 | |
| 
 | |
|     my %contents = ();
 | |
|     my $heading = undef;
 | |
|     foreach (split /\n/, $json_string) {
 | |
|         if (/^{?"(.*?)"/) {
 | |
|             $heading = $1;
 | |
|         } else {
 | |
|             if (/^\s+"(.*?)"\s*:\s+(\d+(\.\d+)?)\b/) {
 | |
|                 # numeric value
 | |
|                 $contents{$heading}{$1} = $2;
 | |
|             } elsif (/^\s+"(.*?)"\s*:\s+"(.*?)"/) {
 | |
|                 $contents{$heading}{$1} = $2;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     my $url = $contents{'header'}{'cloc_url'};
 | |
|     my $ver = $contents{'header'}{'cloc_version'};
 | |
|     my $sec = $contents{'header'}{'elapsed_seconds'};
 | |
|     my $n_file = $contents{'header'}{'n_files'};
 | |
|     my $n_line = $contents{'header'}{'n_lines'};
 | |
|     $sec = $sec == 0 ? 1.0e-3 : $sec;
 | |
|     my $header = sprintf "%s v %s T=%.2f s (%.1f files/s, %.1f lines/s)",
 | |
|                           $url, $ver, $sec, $n_file/$sec, $n_line/$sec;
 | |
|     delete $contents{'header'};
 | |
|     delete $contents{'SUM'};
 | |
| 
 | |
|     my @file_list = (sort { $contents{$b}{'code'} <=>
 | |
|                             $contents{$a}{'code'} } keys %contents );
 | |
| #die Dumper(\%contents);
 | |
|     # Determine column widths for output
 | |
|     my $file_len = 0;
 | |
|     my $lang_len = 0;
 | |
|     foreach my $file (keys %contents) {
 | |
|         my $flen = length $file;
 | |
|         my $llen = length $contents{$file}{'language'};
 | |
|         $file_len = $file_len > $flen ? $file_len : $flen;
 | |
|         $lang_len = $lang_len > $llen ? $lang_len : $llen;
 | |
|     }
 | |
|     print "<- load_json()\n" if $opt_v > 2;
 | |
|     return $file_len, $lang_len, $header, %contents;
 | |
| }
 | |
| # 1}}}
 | |
| sub print_format_n {                         # {{{1
 | |
|     # by file with
 | |
|     # format 1 : Language | files | blank | comment | code
 | |
|     # format 2 : Language | files | blank | comment | code | total
 | |
|     # format 3 : File | Language | blank | comment | code
 | |
|     # format 4 : File | blank | comment | code | total
 | |
|     # format 5 : File | Language | blank | comment | code | total
 | |
|     my ($format, $file_len, $lang_len, $header, %contents) = @_;
 | |
|     print "-> print_format_n($format)\n" if $opt_v > 2;
 | |
|     my @prt_lines = ();
 | |
| 
 | |
|     # 8 = characters in "Language"
 | |
|     $lang_len = max(8, $lang_len);
 | |
|     my %str_fmt = (
 | |
|         1 => sprintf("%%-%ds  %%7s  %%7s  %%7s  %%7s\n", $lang_len),
 | |
|         2 => sprintf("%%-%ds  %%7s  %%7s  %%7s  %%7s  %%7s\n", $lang_len),
 | |
|         3 => sprintf("%%-%ds  %%-%ds  %%7s  %%7s  %%7s\n", $file_len, $lang_len),
 | |
|         4 => sprintf("%%-%ds  %%7s  %%7s  %%7s  %%7s\n", $file_len),
 | |
|         5 => sprintf("%%-%ds  %%-%ds  %%7s  %%7s  %%7s  %%7s\n", $file_len, $lang_len),
 | |
|     );
 | |
|     my %val_fmt = (
 | |
|         1 => sprintf("%%-%ds  %%7d  %%7d  %%7d  %%7d\n", $lang_len),
 | |
|         2 => sprintf("%%-%ds  %%7d  %%7d  %%7d  %%7d  %%7d\n", $lang_len),
 | |
|         3 => sprintf("%%-%ds  %%-%ds  %%7d  %%7d  %%7d\n", $file_len, $lang_len),
 | |
|         4 => sprintf("%%-%ds  %%7d  %%7d  %%7d  %%7d\n", $file_len),
 | |
|         5 => sprintf("%%-%ds  %%-%ds  %%7d  %%7d  %%7d  %%7d\n", $file_len, $lang_len),
 | |
|     );
 | |
|     my %language = ();
 | |
|     foreach my $file (keys %contents) {
 | |
|         my $lang = $contents{$file}{'language'};
 | |
|         $language{$lang}{'files'} += 1;
 | |
|         foreach my $category ('blank', 'comment', 'code',) {
 | |
|             $language{$lang}{$category} += $contents{$file}{$category};
 | |
|             $language{$lang}{'total'}   += $contents{$file}{$category};
 | |
|         }
 | |
|     }
 | |
|     my @file_list = (sort { $contents{$b}{'code'} <=>
 | |
|                             $contents{$a}{'code'} } keys %contents );
 | |
|     my @lang_list = (sort { $language{$b}{'code'} <=>
 | |
|                             $language{$a}{'code'} } keys %language );
 | |
| 
 | |
|     my %hyphens = (
 | |
|         1 => "-" x ($lang_len + 4*9),
 | |
|         2 => "-" x ($lang_len + 5*9),
 | |
|         3 => "-" x ($lang_len + $file_len + 2 + 3*9),
 | |
|         4 => "-" x ($file_len + 4*9),
 | |
|         5 => "-" x ($lang_len + $file_len + 2 + 4*9),
 | |
|     );
 | |
|     my %col_headings = (
 | |
|         1 => ["Language", "files", "blank", "comment", "code"],
 | |
|         2 => ["Language", "files", "blank", "comment", "code", "Total"],
 | |
|         3 => ["File", "Language", "blank", "comment", "code"],
 | |
|         4 => ["File", "blank", "comment", "code", "Total"],
 | |
|         5 => ["File", "Language", "blank", "comment", "code", "Total"],
 | |
|     );
 | |
| 
 | |
|     push @prt_lines, "$header\n";
 | |
|     push @prt_lines, "$hyphens{$format}\n";
 | |
|     push @prt_lines, sprintf $str_fmt{$format}, @{$col_headings{$format}};
 | |
|     push @prt_lines, "$hyphens{$format}\n";
 | |
|     my ($n_files, $n_blank, $n_comment, $n_code, $n_total) = (0, 0, 0, 0, 0);
 | |
|     my @out;
 | |
|     if ($format < 3) {
 | |
|         # by language
 | |
|         foreach my $lang (@lang_list) {
 | |
|             my ($nF, $nB, $nCm, $nCo) = ($language{$lang}{'files'},
 | |
|                                          $language{$lang}{'blank'},
 | |
|                                          $language{$lang}{'comment'},
 | |
|                                          $language{$lang}{'code'});
 | |
|             if      ($format == 1) {
 | |
|                 @out = ($lang, $nF, $nB, $nCm, $nCo);
 | |
|             } else {
 | |
|                 @out = ($lang, $nF, $nB, $nCm, $nCo, $nB + $nCm + $nCo);
 | |
|             }
 | |
|             push @prt_lines, sprintf $val_fmt{$format}, @out;
 | |
|             $n_files   += $nF;
 | |
|             $n_blank   += $nB;
 | |
|             $n_comment += $nCm;
 | |
|             $n_code    += $nCo;
 | |
|             $n_total   += $nB + $nCm + $nCo;
 | |
|         }
 | |
|     } else {
 | |
|         # by file
 | |
|         foreach my $file (@file_list) {
 | |
|             my ($nB, $nCm, $nCo) = ($contents{$file}{'blank'},
 | |
|                                     $contents{$file}{'comment'},
 | |
|                                     $contents{$file}{'code'});
 | |
|             my $lang = $contents{$file}{'language'};
 | |
|             if      ($format == 1) {
 | |
|             } elsif ($format == 3) {
 | |
|                 @out = ($file, $lang, $nB, $nCm, $nCo);
 | |
|             } elsif ($format == 4) {
 | |
|                 @out = ($file, $nB, $nCm, $nCo, $nB + $nCm + $nCo);
 | |
|             } else {
 | |
|                 @out = ($file, $lang, $nB, $nCm, $nCo, $nB + $nCm + $nCo);
 | |
|             }
 | |
|             push @prt_lines, sprintf $val_fmt{$format}, @out;
 | |
|             $n_blank   += $nB;
 | |
|             $n_comment += $nCm;
 | |
|             $n_code    += $nCo;
 | |
|             $n_total   += $nB + $nCm + $nCo;
 | |
|         }
 | |
|     }
 | |
|     push @prt_lines, "$hyphens{$format}\n";
 | |
|     if (scalar @file_list > 1) {
 | |
|         if      ($format == 1) {
 | |
|             @out = ( "SUM", $n_files, $n_blank, $n_comment, $n_code );
 | |
|         } elsif ($format == 2) {
 | |
|             @out = ( "SUM", $n_files, $n_blank, $n_comment, $n_code, $n_total );
 | |
|         } elsif ($format == 3) {
 | |
|             @out = ( "SUM", " ", $n_blank, $n_comment, $n_code );
 | |
|         } elsif ($format == 4) {
 | |
|             @out = ( "SUM", $n_blank, $n_comment, $n_code, $n_total );
 | |
|         } else {
 | |
|             @out = ( "SUM", " ", $n_blank, $n_comment, $n_code, $n_total );
 | |
|         }
 | |
|         push @prt_lines, sprintf $val_fmt{$format}, @out;
 | |
|         push @prt_lines, "$hyphens{$format}\n";
 | |
|     }
 | |
|     return @prt_lines;
 | |
|     print "<- print_format_n()\n" if $opt_v > 2;
 | |
| } # 1}}}
 | |
| # really_is_pascal, really_is_incpascal, really_is_php from SLOCCount
 | |
| my %php_files    = ();  # really_is_php()
 | |
| sub really_is_pascal {                       # {{{1
 | |
| # Given filename, returns TRUE if its contents really are Pascal.
 | |
| 
 | |
| # This isn't as obvious as it seems.
 | |
| # Many ".p" files are Perl files
 | |
| # (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p),
 | |
| # others are C extractions
 | |
| # (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p
 | |
| # and some files in linuxconf).
 | |
| # However, test files in "p2c" really are Pascal, for example.
 | |
| 
 | |
| # Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p
 | |
| # is actually C code.  The heuristics determine that they're not Pascal,
 | |
| # but because it ends in ".p" it's not counted as C code either.
 | |
| # I believe this is actually correct behavior, because frankly it
 | |
| # looks like it's automatically generated (it's a bitmap expressed as code).
 | |
| # Rather than guess otherwise, we don't include it in a list of
 | |
| # source files.  Let's face it, someone who creates C files ending in ".p"
 | |
| # and expects them to be counted by default as C files in SLOCCount needs
 | |
| # their head examined.  I suggest examining their head
 | |
| # with a sucker rod (see syslogd(8) for more on sucker rods).
 | |
| 
 | |
| # This heuristic counts as Pascal such files such as:
 | |
| #  /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p
 | |
| # Which is hand-generated.  We don't count woven documents now anyway,
 | |
| # so this is justifiable.
 | |
| 
 | |
|  my $filename = shift;
 | |
|  chomp($filename);
 | |
| 
 | |
| # The heuristic is as follows: it's Pascal _IF_ it has all of the following
 | |
| # (ignoring {...} and (*...*) comments):
 | |
| # 1. "^..program NAME" or "^..unit NAME",
 | |
| # 2. "procedure", "function", "^..interface", or "^..implementation",
 | |
| # 3. a "begin", and
 | |
| # 4. it ends with "end.",
 | |
| #
 | |
| # Or it has all of the following:
 | |
| # 1. "^..module NAME" and
 | |
| # 2. it ends with "end.".
 | |
| #
 | |
| # Or it has all of the following:
 | |
| # 1. "^..program NAME",
 | |
| # 2. a "begin", and
 | |
| # 3. it ends with "end.".
 | |
| #
 | |
| # The "end." requirements in particular filter out non-Pascal.
 | |
| #
 | |
| # Note (jgb): this does not detect Pascal main files in fpc, like
 | |
| # fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in
 | |
| # it
 | |
| 
 | |
|  my $is_pascal = 0;      # Value to determine.
 | |
| 
 | |
|  my $has_program = 0;
 | |
|  my $has_unit = 0;
 | |
|  my $has_module = 0;
 | |
|  my $has_procedure_or_function = 0;
 | |
|  my $found_begin = 0;
 | |
|  my $found_terminating_end = 0;
 | |
|  my $has_begin = 0;
 | |
| 
 | |
|  my $PASCAL_FILE = open_file('<', $filename, 0);
 | |
|  die "Can't open $filename to determine if it's pascal.\n" if !defined $PASCAL_FILE;
 | |
|  while(<$PASCAL_FILE>) {
 | |
|    s/\{.*?\}//g;  # Ignore {...} comments on this line; imperfect, but effective.
 | |
|    s/\(\*.*?\*\)//g;  # Ignore (*...*) comments on this line; imperfect, but effective.
 | |
|    if (m/\bprogram\s+[A-Za-z]/i)  {$has_program=1;}
 | |
|    if (m/\bunit\s+[A-Za-z]/i)     {$has_unit=1;}
 | |
|    if (m/\bmodule\s+[A-Za-z]/i)   {$has_module=1;}
 | |
|    if (m/\bprocedure\b/i)         { $has_procedure_or_function = 1; }
 | |
|    if (m/\bfunction\b/i)          { $has_procedure_or_function = 1; }
 | |
|    if (m/^\s*interface\s+/i)      { $has_procedure_or_function = 1; }
 | |
|    if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; }
 | |
|    if (m/\bbegin\b/i) { $has_begin = 1; }
 | |
|    # Originally I said:
 | |
|    # "This heuristic fails if there are multi-line comments after
 | |
|    # "end."; I haven't seen that in real Pascal programs:"
 | |
|    # But jgb found there are a good quantity of them in Debian, specially in
 | |
|    # fpc (at the end of a lot of files there is a multiline comment
 | |
|    # with the changelog for the file).
 | |
|    # Therefore, assume Pascal if "end." appears anywhere in the file.
 | |
|    if (m/end\.\s*$/i) {$found_terminating_end = 1;}
 | |
| #   elsif (m/\S/) {$found_terminating_end = 0;}
 | |
|  }
 | |
|  close($PASCAL_FILE);
 | |
| 
 | |
|  # Okay, we've examined the entire file looking for clues;
 | |
|  # let's use those clues to determine if it's really Pascal:
 | |
| 
 | |
|  if ( ( ($has_unit || $has_program) && $has_procedure_or_function &&
 | |
|      $has_begin && $found_terminating_end ) ||
 | |
|       ( $has_module && $found_terminating_end ) ||
 | |
|       ( $has_program && $has_begin && $found_terminating_end ) )
 | |
|           {$is_pascal = 1;}
 | |
| 
 | |
|  return $is_pascal;
 | |
| } # 1}}}
 | |
| sub really_is_incpascal {                    # {{{1
 | |
| # Given filename, returns TRUE if its contents really are Pascal.
 | |
| # For .inc files (mainly seen in fpc)
 | |
| 
 | |
|  my $filename = shift;
 | |
|  chomp($filename);
 | |
| 
 | |
| # The heuristic is as follows: it is Pascal if any of the following:
 | |
| # 1. really_is_pascal returns true
 | |
| # 2. Any usual reserved word is found (program, unit, const, begin...)
 | |
| 
 | |
|  # If the general routine for Pascal files works, we have it
 | |
|  if (really_is_pascal($filename)) {
 | |
|    return 1;
 | |
|  }
 | |
| 
 | |
|  my $is_pascal = 0;      # Value to determine.
 | |
|  my $found_begin = 0;
 | |
| 
 | |
|  my $PASCAL_FILE = open_file('<', $filename, 0);
 | |
|  die "Can't open $filename to determine if it's pascal.\n" if !defined $PASCAL_FILE;
 | |
|  while(<$PASCAL_FILE>) {
 | |
|    s/\{.*?\}//g;  # Ignore {...} comments on this line; imperfect, but effective.
 | |
|    s/\(\*.*?\*\)//g;  # Ignore (*...*) comments on this line; imperfect, but effective.
 | |
|    if (m/\bprogram\s+[A-Za-z]/i)  {$is_pascal=1;}
 | |
|    if (m/\bunit\s+[A-Za-z]/i)     {$is_pascal=1;}
 | |
|    if (m/\bmodule\s+[A-Za-z]/i)   {$is_pascal=1;}
 | |
|    if (m/\bprocedure\b/i)         {$is_pascal = 1; }
 | |
|    if (m/\bfunction\b/i)          {$is_pascal = 1; }
 | |
|    if (m/^\s*interface\s+/i)      {$is_pascal = 1; }
 | |
|    if (m/^\s*implementation\s+/i) {$is_pascal = 1; }
 | |
|    if (m/\bconstant\s+/i)         {$is_pascal=1;}
 | |
|    if (m/\bbegin\b/i) { $found_begin = 1; }
 | |
|    if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;}
 | |
|    if ($is_pascal) {
 | |
|      last;
 | |
|    }
 | |
|  }
 | |
| 
 | |
|  close($PASCAL_FILE);
 | |
|  return $is_pascal;
 | |
| } # 1}}}
 | |
| sub really_is_php {                          # {{{1
 | |
| # Given filename, returns TRUE if its contents really is php.
 | |
| 
 | |
|  my $filename = shift;
 | |
|  chomp($filename);
 | |
| 
 | |
|  my $is_php = 0;      # Value to determine.
 | |
|  # Need to find a matching pair of surrounds, with ending after beginning:
 | |
|  my $normal_surround = 0;  # <?; bit 0 = <?, bit 1 = ?>
 | |
|  my $script_surround = 0;  # <script..>; bit 0 = <script language="php">
 | |
|  my $asp_surround = 0;     # <%; bit 0 = <%, bit 1 = %>
 | |
| 
 | |
|  # Return cached result, if available:
 | |
|  if ($php_files{$filename}) { return $php_files{$filename};}
 | |
| 
 | |
|  my $PHP_FILE = open_file('<', $filename, 0);
 | |
|  die "Can't open $filename to determine if it's php.\n" if !defined $PHP_FILE;
 | |
|  while(<$PHP_FILE>) {
 | |
|    if (m/\<\?/)                           { $normal_surround |= 1; }
 | |
|    if (m/\?\>/ && ($normal_surround & 1)) { $normal_surround |= 2; }
 | |
|    if (m/\<script.*language="?php"?/i)    { $script_surround |= 1; }
 | |
|    if (m/\<\/script\>/i && ($script_surround & 1)) { $script_surround |= 2; }
 | |
|    if (m/\<\%/)                           { $asp_surround |= 1; }
 | |
|    if (m/\%\>/ && ($asp_surround & 1)) { $asp_surround |= 2; }
 | |
|  }
 | |
|  close($PHP_FILE);
 | |
| 
 | |
|  if ( ($normal_surround == 3) || ($script_surround == 3) ||
 | |
|       ($asp_surround == 3)) {
 | |
|    $is_php = 1;
 | |
|  }
 | |
| 
 | |
|  $php_files{$filename} = $is_php; # Store result in cache.
 | |
| 
 | |
|  return $is_php;
 | |
| } # 1}}}
 | |
| # vendored modules
 | |
| sub Install_Regexp_Common {                  # {{{1
 | |
|     # Installs portions of Damian Conway's & Abigail's Regexp::Common
 | |
|     # module, version 2017060201 into a temporary directory for the
 | |
|     # duration of this run.
 | |
|     my %Regexp_Common_Contents = ();
 | |
| $Regexp_Common_Contents{'Common'} = <<'EOCommon'; # {{{2
 | |
| package Regexp::Common;
 | |
| 
 | |
| use 5.10.0;
 | |
| use strict;
 | |
| 
 | |
| use warnings;
 | |
| no  warnings 'syntax';
 | |
| 
 | |
| our $VERSION = '2017060201';
 | |
| our %RE;
 | |
| our %sub_interface;
 | |
| our $AUTOLOAD;
 | |
| 
 | |
| 
 | |
| sub _croak {
 | |
|     require Carp;
 | |
|     goto &Carp::croak;
 | |
| }
 | |
| 
 | |
| sub _carp {
 | |
|     require Carp;
 | |
|     goto &Carp::carp;
 | |
| }
 | |
| 
 | |
| sub new {
 | |
|     my ($class, @data) = @_;
 | |
|     my %self;
 | |
|     tie %self, $class, @data;
 | |
|     return \%self;
 | |
| }
 | |
| 
 | |
| sub TIEHASH {
 | |
|     my ($class, @data) = @_;
 | |
|     bless \@data, $class;
 | |
| }
 | |
| 
 | |
| sub FETCH {
 | |
|     my ($self, $extra) = @_;
 | |
|     return bless ref($self)->new(@$self, $extra), ref($self);
 | |
| }
 | |
| 
 | |
| my %imports = map {$_ => "Regexp::Common::$_"}
 | |
|               qw /balanced CC     comment   delimited lingua list
 | |
|                   net      number profanity SEN       URI    whitespace
 | |
|                   zip/;
 | |
| 
 | |
| sub import {
 | |
|     shift;  # Shift off the class.
 | |
|     tie %RE, __PACKAGE__;
 | |
|     {
 | |
|         no strict 'refs';
 | |
|         *{caller() . "::RE"} = \%RE;
 | |
|     }
 | |
| 
 | |
|     my $saw_import;
 | |
|     my $no_defaults;
 | |
|     my %exclude;
 | |
|     foreach my $entry (grep {!/^RE_/} @_) {
 | |
|         if ($entry eq 'pattern') {
 | |
|             no strict 'refs';
 | |
|             *{caller() . "::pattern"} = \&pattern;
 | |
|             next;
 | |
|         }
 | |
|         # This used to prevent $; from being set. We still recognize it,
 | |
|         # but we won't do anything.
 | |
|         if ($entry eq 'clean') {
 | |
|             next;
 | |
|         }
 | |
|         if ($entry eq 'no_defaults') {
 | |
|             $no_defaults ++;
 | |
|             next;
 | |
|         }
 | |
|         if (my $module = $imports {$entry}) {
 | |
|             $saw_import ++;
 | |
|             eval "require $module;";
 | |
|             die $@ if $@;
 | |
|             next;
 | |
|         }
 | |
|         if ($entry =~ /^!(.*)/ && $imports {$1}) {
 | |
|             $exclude {$1} ++;
 | |
|             next;
 | |
|         }
 | |
|         # As a last resort, try to load the argument.
 | |
|         my $module = $entry =~ /^Regexp::Common/
 | |
|                             ? $entry
 | |
|                             : "Regexp::Common::" . $entry;
 | |
|         eval "require $module;";
 | |
|         die $@ if $@;
 | |
|     }
 | |
| 
 | |
|     unless ($saw_import || $no_defaults) {
 | |
|         foreach my $module (values %imports) {
 | |
|             next if $exclude {$module};
 | |
|             eval "require $module;";
 | |
|             die $@ if $@;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     my %exported;
 | |
|     foreach my $entry (grep {/^RE_/} @_) {
 | |
|         if ($entry =~ /^RE_(\w+_)?ALL$/) {
 | |
|             my $m  = defined $1 ? $1 : "";
 | |
|             my $re = qr /^RE_${m}.*$/;
 | |
|             while (my ($sub, $interface) = each %sub_interface) {
 | |
|                 next if $exported {$sub};
 | |
|                 next unless $sub =~ /$re/;
 | |
|                 {
 | |
|                     no strict 'refs';
 | |
|                     *{caller() . "::$sub"} = $interface;
 | |
|                 }
 | |
|                 $exported {$sub} ++;
 | |
|             }
 | |
|         }
 | |
|         else {
 | |
|             next if $exported {$entry};
 | |
|             _croak "Can't export unknown subroutine &$entry"
 | |
|                 unless $sub_interface {$entry};
 | |
|             {
 | |
|                 no strict 'refs';
 | |
|                 *{caller() . "::$entry"} = $sub_interface {$entry};
 | |
|             }
 | |
|             $exported {$entry} ++;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| sub AUTOLOAD { _croak "Can't $AUTOLOAD" }
 | |
| 
 | |
| sub DESTROY {}
 | |
| 
 | |
| my %cache;
 | |
| 
 | |
| my $fpat = qr/^(-\w+)/;
 | |
| 
 | |
| sub _decache {
 | |
|         my @args = @{tied %{$_[0]}};
 | |
|         my @nonflags = grep {!/$fpat/} @args;
 | |
|         my $cache = get_cache(@nonflags);
 | |
|         _croak "Can't create unknown regex: \$RE{"
 | |
|             . join("}{",@args) . "}"
 | |
|                 unless exists $cache->{__VAL__};
 | |
|         _croak "Perl $] does not support the pattern "
 | |
|             . "\$RE{" . join("}{",@args)
 | |
|             . "}.\nYou need Perl $cache->{__VAL__}{version} or later"
 | |
|                 unless ($cache->{__VAL__}{version}||0) <= $];
 | |
|         my %flags = ( %{$cache->{__VAL__}{default}},
 | |
|                       map { /$fpat\Q$;\E(.*)/ ? ($1 => $2)
 | |
|                           : /$fpat/           ? ($1 => undef)
 | |
|                           :                     ()
 | |
|                           } @args);
 | |
|         $cache->{__VAL__}->_clone_with(\@args, \%flags);
 | |
| }
 | |
| 
 | |
| use overload q{""} => \&_decache;
 | |
| 
 | |
| 
 | |
| sub get_cache {
 | |
|         my $cache = \%cache;
 | |
|         foreach (@_) {
 | |
|                 $cache = $cache->{$_}
 | |
|                       || ($cache->{$_} = {});
 | |
|         }
 | |
|         return $cache;
 | |
| }
 | |
| 
 | |
| sub croak_version {
 | |
|         my ($entry, @args) = @_;
 | |
| }
 | |
| 
 | |
| sub pattern {
 | |
|         my %spec = @_;
 | |
|         _croak 'pattern() requires argument: name => [ @list ]'
 | |
|                 unless $spec{name} && ref $spec{name} eq 'ARRAY';
 | |
|         _croak 'pattern() requires argument: create => $sub_ref_or_string'
 | |
|                 unless $spec{create};
 | |
| 
 | |
|         if (ref $spec{create} ne "CODE") {
 | |
|                 my $fixed_str = "$spec{create}";
 | |
|                 $spec{create} = sub { $fixed_str }
 | |
|         }
 | |
| 
 | |
|         my @nonflags;
 | |
|         my %default;
 | |
|         foreach ( @{$spec{name}} ) {
 | |
|                 if (/$fpat=(.*)/) {
 | |
|                         $default{$1} = $2;
 | |
|                 }
 | |
|                 elsif (/$fpat\s*$/) {
 | |
|                         $default{$1} = undef;
 | |
|                 }
 | |
|                 else {
 | |
|                         push @nonflags, $_;
 | |
|                 }
 | |
|         }
 | |
| 
 | |
|         my $entry = get_cache(@nonflags);
 | |
| 
 | |
|         if ($entry->{__VAL__}) {
 | |
|                 _carp "Overriding \$RE{"
 | |
|                    . join("}{",@nonflags)
 | |
|                    . "}";
 | |
|         }
 | |
| 
 | |
|         $entry->{__VAL__} = bless {
 | |
|                                 create  => $spec{create},
 | |
|                                 match   => $spec{match} || \&generic_match,
 | |
|                                 subs    => $spec{subs}  || \&generic_subs,
 | |
|                                 version => $spec{version},
 | |
|                                 default => \%default,
 | |
|                             }, 'Regexp::Common::Entry';
 | |
| 
 | |
|         foreach (@nonflags) {s/\W/X/g}
 | |
|         my $subname = "RE_" . join ("_", @nonflags);
 | |
|         $sub_interface{$subname} = sub {
 | |
|                 push @_ => undef if @_ % 2;
 | |
|                 my %flags = @_;
 | |
|                 my $pat = $spec{create}->($entry->{__VAL__},
 | |
|                                {%default, %flags}, \@nonflags);
 | |
|                 if (exists $flags{-keep}) { $pat =~ s/\Q(?k:/(/g; }
 | |
|                 else { $pat =~ s/\Q(?k:/(?:/g; }
 | |
|                 return exists $flags {-i} ? qr /(?i:$pat)/ : qr/$pat/;
 | |
|         };
 | |
| 
 | |
|         return 1;
 | |
| }
 | |
| 
 | |
| sub generic_match {$_ [1] =~  /$_[0]/}
 | |
| sub generic_subs  {$_ [1] =~ s/$_[0]/$_[2]/}
 | |
| 
 | |
| sub matches {
 | |
|         my ($self, $str) = @_;
 | |
|         my $entry = $self -> _decache;
 | |
|         $entry -> {match} -> ($entry, $str);
 | |
| }
 | |
| 
 | |
| sub subs {
 | |
|         my ($self, $str, $newstr) = @_;
 | |
|         my $entry = $self -> _decache;
 | |
|         $entry -> {subs} -> ($entry, $str, $newstr);
 | |
|         return $str;
 | |
| }
 | |
| 
 | |
| 
 | |
| package Regexp::Common::Entry;
 | |
| # use Carp;
 | |
| 
 | |
| use overload
 | |
|     q{""} => sub {
 | |
|         my ($self) = @_;
 | |
|         my $pat = $self->{create}->($self, $self->{flags}, $self->{args});
 | |
|         if (exists $self->{flags}{-keep}) {
 | |
|             $pat =~ s/\Q(?k:/(/g;
 | |
|         }
 | |
|         else {
 | |
|             $pat =~ s/\Q(?k:/(?:/g;
 | |
|         }
 | |
|         if (exists $self->{flags}{-i})   { $pat = "(?i)$pat" }
 | |
|         return $pat;
 | |
|     };
 | |
| 
 | |
| sub _clone_with {
 | |
|     my ($self, $args, $flags) = @_;
 | |
|     bless { %$self, args=>$args, flags=>$flags }, ref $self;
 | |
| }
 | |
| 
 | |
| 1;
 | |
| 
 | |
| __END__
 | |
| 
 | |
| =pod
 | |
| 
 | |
| =head1 NAME
 | |
| 
 | |
| Regexp::Common - Provide commonly requested regular expressions
 | |
| 
 | |
| =head1 SYNOPSIS
 | |
| 
 | |
|  # STANDARD USAGE
 | |
| 
 | |
|  use Regexp::Common;
 | |
| 
 | |
|  while (<>) {
 | |
|      /$RE{num}{real}/               and print q{a number};
 | |
|      /$RE{quoted}/                  and print q{a ['"`] quoted string};
 | |
|     m[$RE{delimited}{-delim=>'/'}]  and print q{a /.../ sequence};
 | |
|      /$RE{balanced}{-parens=>'()'}/ and print q{balanced parentheses};
 | |
|      /$RE{profanity}/               and print q{a #*@%-ing word};
 | |
|  }
 | |
| 
 | |
| 
 | |
|  # SUBROUTINE-BASED INTERFACE
 | |
| 
 | |
|  use Regexp::Common 'RE_ALL';
 | |
| 
 | |
|  while (<>) {
 | |
|      $_ =~ RE_num_real()              and print q{a number};
 | |
|      $_ =~ RE_quoted()                and print q{a ['"`] quoted string};
 | |
|      $_ =~ RE_delimited(-delim=>'/')  and print q{a /.../ sequence};
 | |
|      $_ =~ RE_balanced(-parens=>'()'} and print q{balanced parentheses};
 | |
|      $_ =~ RE_profanity()             and print q{a #*@%-ing word};
 | |
|  }
 | |
| 
 | |
| 
 | |
|  # IN-LINE MATCHING...
 | |
| 
 | |
|  if ( $RE{num}{int}->matches($text) ) {...}
 | |
| 
 | |
| 
 | |
|  # ...AND SUBSTITUTION
 | |
| 
 | |
|  my $cropped = $RE{ws}{crop}->subs($uncropped);
 | |
| 
 | |
| 
 | |
|  # ROLL-YOUR-OWN PATTERNS
 | |
| 
 | |
|  use Regexp::Common 'pattern';
 | |
| 
 | |
|  pattern name   => ['name', 'mine'],
 | |
|          create => '(?i:J[.]?\s+A[.]?\s+Perl-Hacker)',
 | |
|          ;
 | |
| 
 | |
|  my $name_matcher = $RE{name}{mine};
 | |
| 
 | |
|  pattern name    => [ 'lineof', '-char=_' ],
 | |
|          create  => sub {
 | |
|                         my $flags = shift;
 | |
|                         my $char = quotemeta $flags->{-char};
 | |
|                         return '(?:^$char+$)';
 | |
|                     },
 | |
|          match   => sub {
 | |
|                         my ($self, $str) = @_;
 | |
|                         return $str !~ /[^$self->{flags}{-char}]/;
 | |
|                     },
 | |
|          subs   => sub {
 | |
|                         my ($self, $str, $replacement) = @_;
 | |
|                         $_[1] =~ s/^$self->{flags}{-char}+$//g;
 | |
|                    },
 | |
|          ;
 | |
| 
 | |
|  my $asterisks = $RE{lineof}{-char=>'*'};
 | |
| 
 | |
|  # DECIDING WHICH PATTERNS TO LOAD.
 | |
| 
 | |
|  use Regexp::Common qw /comment number/;  # Comment and number patterns.
 | |
|  use Regexp::Common qw /no_defaults/;     # Don't load any patterns.
 | |
|  use Regexp::Common qw /!delimited/;      # All, but delimited patterns.
 | |
| 
 | |
| 
 | |
| =head1 DESCRIPTION
 | |
| 
 | |
| By default, this module exports a single hash (C<%RE>) that stores or generates
 | |
| commonly needed regular expressions (see L<"List of available patterns">).
 | |
| 
 | |
| There is an alternative, subroutine-based syntax described in
 | |
| L<"Subroutine-based interface">.
 | |
| 
 | |
| 
 | |
| =head2 General syntax for requesting patterns
 | |
| 
 | |
| To access a particular pattern, C<%RE> is treated as a hierarchical hash of
 | |
| hashes (of hashes...), with each successive key being an identifier. For
 | |
| example, to access the pattern that matches real numbers, you
 | |
| specify:
 | |
| 
 | |
|         $RE{num}{real}
 | |
| 
 | |
| and to access the pattern that matches integers:
 | |
| 
 | |
|         $RE{num}{int}
 | |
| 
 | |
| Deeper layers of the hash are used to specify I<flags>: arguments that
 | |
| modify the resulting pattern in some way. The keys used to access these
 | |
| layers are prefixed with a minus sign and may have a value; if a value
 | |
| is given, it's done by using a multidimensional key.
 | |
| For example, to access the pattern that
 | |
| matches base-2 real numbers with embedded commas separating
 | |
| groups of three digits (e.g. 10,101,110.110101101):
 | |
| 
 | |
|         $RE{num}{real}{-base => 2}{-sep => ','}{-group => 3}
 | |
| 
 | |
| Through the magic of Perl, these flag layers may be specified in any order
 | |
| (and even interspersed through the identifier keys!)
 | |
| so you could get the same pattern with:
 | |
| 
 | |
|         $RE{num}{real}{-sep => ','}{-group => 3}{-base => 2}
 | |
| 
 | |
| or:
 | |
| 
 | |
|         $RE{num}{-base => 2}{real}{-group => 3}{-sep => ','}
 | |
| 
 | |
| or even:
 | |
| 
 | |
|         $RE{-base => 2}{-group => 3}{-sep => ','}{num}{real}
 | |
| 
 | |
| etc.
 | |
| 
 | |
| Note, however, that the relative order of amongst the identifier keys
 | |
| I<is> significant. That is:
 | |
| 
 | |
|         $RE{list}{set}
 | |
| 
 | |
| would not be the same as:
 | |
| 
 | |
|         $RE{set}{list}
 | |
| 
 | |
| =head2 Flag syntax
 | |
| 
 | |
| In versions prior to 2.113, flags could also be written as
 | |
| C<{"-flag=value"}>. This no longer works, although C<{"-flag$;value"}>
 | |
| still does. However, C<< {-flag => 'value'} >> is the preferred syntax.
 | |
| 
 | |
| =head2 Universal flags
 | |
| 
 | |
| Normally, flags are specific to a single pattern.
 | |
| However, there is two flags that all patterns may specify.
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item C<-keep>
 | |
| 
 | |
| By default, the patterns provided by C<%RE> contain no capturing
 | |
| parentheses. However, if the C<-keep> flag is specified (it requires
 | |
| no value) then any significant substrings that the pattern matches
 | |
| are captured. For example:
 | |
| 
 | |
|         if ($str =~ $RE{num}{real}{-keep}) {
 | |
|                 $number   = $1;
 | |
|                 $whole    = $3;
 | |
|                 $decimals = $5;
 | |
|         }
 | |
| 
 | |
| Special care is needed if a "kept" pattern is interpolated into a
 | |
| larger regular expression, as the presence of other capturing
 | |
| parentheses is likely to change the "number variables" into which significant
 | |
| substrings are saved.
 | |
| 
 | |
| See also L<"Adding new regular expressions">, which describes how to create
 | |
| new patterns with "optional" capturing brackets that respond to C<-keep>.
 | |
| 
 | |
| =item C<-i>
 | |
| 
 | |
| Some patterns or subpatterns only match lowercase or uppercase letters.
 | |
| If one wants the do case insensitive matching, one option is to use
 | |
| the C</i> regexp modifier, or the special sequence C<(?i)>. But if the
 | |
| functional interface is used, one does not have this option. The
 | |
| C<-i> switch solves this problem; by using it, the pattern will do
 | |
| case insensitive matching.
 | |
| 
 | |
| =back
 | |
| 
 | |
| =head2 OO interface and inline matching/substitution
 | |
| 
 | |
| The patterns returned from C<%RE> are objects, so rather than writing:
 | |
| 
 | |
|         if ($str =~ /$RE{some}{pattern}/ ) {...}
 | |
| 
 | |
| you can write:
 | |
| 
 | |
|         if ( $RE{some}{pattern}->matches($str) ) {...}
 | |
| 
 | |
| For matching this would seem to have no great advantage apart from readability
 | |
| (but see below).
 | |
| 
 | |
| For substitutions, it has other significant benefits. Frequently you want to
 | |
| perform a substitution on a string without changing the original. Most people
 | |
| use this:
 | |
| 
 | |
|         $changed = $original;
 | |
|         $changed =~ s/$RE{some}{pattern}/$replacement/;
 | |
| 
 | |
| The more adept use:
 | |
| 
 | |
|         ($changed = $original) =~ s/$RE{some}{pattern}/$replacement/;
 | |
| 
 | |
| Regexp::Common allows you do write this:
 | |
| 
 | |
|         $changed = $RE{some}{pattern}->subs($original=>$replacement);
 | |
| 
 | |
| Apart from reducing precedence-angst, this approach has the added
 | |
| advantages that the substitution behaviour can be optimized from the
 | |
| regular expression, and the replacement string can be provided by
 | |
| default (see L<"Adding new regular expressions">).
 | |
| 
 | |
| For example, in the implementation of this substitution:
 | |
| 
 | |
|         $cropped = $RE{ws}{crop}->subs($uncropped);
 | |
| 
 | |
| the default empty string is provided automatically, and the substitution is
 | |
| optimized to use:
 | |
| 
 | |
|         $uncropped =~ s/^\s+//;
 | |
|         $uncropped =~ s/\s+$//;
 | |
| 
 | |
| rather than:
 | |
| 
 | |
|         $uncropped =~ s/^\s+|\s+$//g;
 | |
| 
 | |
| 
 | |
| =head2 Subroutine-based interface
 | |
| 
 | |
| The hash-based interface was chosen because it allows regexes to be
 | |
| effortlessly interpolated, and because it also allows them to be
 | |
| "curried". For example:
 | |
| 
 | |
|         my $num = $RE{num}{int};
 | |
| 
 | |
|         my $command    = $num->{-sep=>','}{-group=>3};
 | |
|         my $duodecimal = $num->{-base=>12};
 | |
| 
 | |
| 
 | |
| However, the use of tied hashes does make the access to Regexp::Common
 | |
| patterns slower than it might otherwise be. In contexts where impatience
 | |
| overrules laziness, Regexp::Common provides an additional
 | |
| subroutine-based interface.
 | |
| 
 | |
| For each (sub-)entry in the C<%RE> hash (C<$RE{key1}{key2}{etc}>), there
 | |
| is a corresponding exportable subroutine: C<RE_key1_key2_etc()>. The name of
 | |
| each subroutine is the underscore-separated concatenation of the I<non-flag>
 | |
| keys that locate the same pattern in C<%RE>. Flags are passed to the subroutine
 | |
| in its argument list. Thus:
 | |
| 
 | |
|         use Regexp::Common qw( RE_ws_crop RE_num_real RE_profanity );
 | |
| 
 | |
|         $str =~ RE_ws_crop() and die "Surrounded by whitespace";
 | |
| 
 | |
|         $str =~ RE_num_real(-base=>8, -sep=>" ") or next;
 | |
| 
 | |
|         $offensive = RE_profanity(-keep);
 | |
|         $str =~ s/$offensive/$bad{$1}++; "<expletive deleted>"/ge;
 | |
| 
 | |
| Note that, unlike the hash-based interface (which returns objects), these
 | |
| subroutines return ordinary C<qr>'d regular expressions. Hence they do not
 | |
| curry, nor do they provide the OO match and substitution inlining described
 | |
| in the previous section.
 | |
| 
 | |
| It is also possible to export subroutines for all available patterns like so:
 | |
| 
 | |
|         use Regexp::Common 'RE_ALL';
 | |
| 
 | |
| Or you can export all subroutines with a common prefix of keys like so:
 | |
| 
 | |
|         use Regexp::Common 'RE_num_ALL';
 | |
| 
 | |
| which will export C<RE_num_int> and C<RE_num_real> (and if you have
 | |
| create more patterns who have first key I<num>, those will be exported
 | |
| as well). In general, I<RE_key1_..._keyn_ALL> will export all subroutines
 | |
| whose pattern names have first keys I<key1> ... I<keyn>.
 | |
| 
 | |
| 
 | |
| =head2 Adding new regular expressions
 | |
| 
 | |
| You can add your own regular expressions to the C<%RE> hash at run-time,
 | |
| using the exportable C<pattern> subroutine. It expects a hash-like list of
 | |
| key/value pairs that specify the behaviour of the pattern. The various
 | |
| possible argument pairs are:
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item C<name =E<gt> [ @list ]>
 | |
| 
 | |
| A required argument that specifies the name of the pattern, and any
 | |
| flags it may take, via a reference to a list of strings. For example:
 | |
| 
 | |
|          pattern name => [qw( line of -char )],
 | |
|                  # other args here
 | |
|                  ;
 | |
| 
 | |
| This specifies an entry C<$RE{line}{of}>, which may take a C<-char> flag.
 | |
| 
 | |
| Flags may also be specified with a default value, which is then used whenever
 | |
| the flag is specified without an explicit value (but not when the flag is
 | |
| omitted). For example:
 | |
| 
 | |
|          pattern name => [qw( line of -char=_ )],
 | |
|                  # default char is '_'
 | |
|                  # other args here
 | |
|                  ;
 | |
| 
 | |
| 
 | |
| =item C<create =E<gt> $sub_ref_or_string>
 | |
| 
 | |
| A required argument that specifies either a string that is to be returned
 | |
| as the pattern:
 | |
| 
 | |
|         pattern name    => [qw( line of underscores )],
 | |
|                 create  => q/(?:^_+$)/
 | |
|                 ;
 | |
| 
 | |
| or a reference to a subroutine that will be called to create the pattern:
 | |
| 
 | |
|         pattern name    => [qw( line of -char=_ )],
 | |
|                 create  => sub {
 | |
|                                 my ($self, $flags) = @_;
 | |
|                                 my $char = quotemeta $flags->{-char};
 | |
|                                 return '(?:^$char+$)';
 | |
|                             },
 | |
|                 ;
 | |
| 
 | |
| If the subroutine version is used, the subroutine will be called with
 | |
| three arguments: a reference to the pattern object itself, a reference
 | |
| to a hash containing the flags and their values,
 | |
| and a reference to an array containing the non-flag keys.
 | |
| 
 | |
| Whatever the subroutine returns is stringified as the pattern.
 | |
| 
 | |
| No matter how the pattern is created, it is immediately postprocessed to
 | |
| include or exclude capturing parentheses (according to the value of the
 | |
| C<-keep> flag). To specify such "optional" capturing parentheses within
 | |
| the regular expression associated with C<create>, use the notation
 | |
| C<(?k:...)>. Any parentheses of this type will be converted to C<(...)>
 | |
| when the C<-keep> flag is specified, or C<(?:...)> when it is not.
 | |
| It is a Regexp::Common convention that the outermost capturing parentheses
 | |
| always capture the entire pattern, but this is not enforced.
 | |
| 
 | |
| 
 | |
| =item C<match =E<gt> $sub_ref>
 | |
| 
 | |
| An optional argument that specifies a subroutine that is to be called when
 | |
| the C<$RE{...}-E<gt>matches(...)> method of this pattern is invoked.
 | |
| 
 | |
| The subroutine should expect two arguments: a reference to the pattern object
 | |
| itself, and the string to be matched against.
 | |
| 
 | |
| It should return the same types of values as a C<m/.../> does.
 | |
| 
 | |
|      pattern name    => [qw( line of -char )],
 | |
|              create  => sub {...},
 | |
|              match   => sub {
 | |
|                              my ($self, $str) = @_;
 | |
|                              $str !~ /[^$self->{flags}{-char}]/;
 | |
|                         },
 | |
|              ;
 | |
| 
 | |
| 
 | |
| =item C<subs =E<gt> $sub_ref>
 | |
| 
 | |
| An optional argument that specifies a subroutine that is to be called when
 | |
| the C<$RE{...}-E<gt>subs(...)> method of this pattern is invoked.
 | |
| 
 | |
| The subroutine should expect three arguments: a reference to the pattern object
 | |
| itself, the string to be changed, and the value to be substituted into it.
 | |
| The third argument may be C<undef>, indicating the default substitution is
 | |
| required.
 | |
| 
 | |
| The subroutine should return the same types of values as an C<s/.../.../> does.
 | |
| 
 | |
| For example:
 | |
| 
 | |
|      pattern name    => [ 'lineof', '-char=_' ],
 | |
|              create  => sub {...},
 | |
|              subs    => sub {
 | |
|                           my ($self, $str, $ignore_replacement) = @_;
 | |
|                           $_[1] =~ s/^$self->{flags}{-char}+$//g;
 | |
|                         },
 | |
|              ;
 | |
| 
 | |
| Note that such a subroutine will almost always need to modify C<$_[1]> directly.
 | |
| 
 | |
| 
 | |
| =item C<version =E<gt> $minimum_perl_version>
 | |
| 
 | |
| If this argument is given, it specifies the minimum version of perl required
 | |
| to use the new pattern. Attempts to use the pattern with earlier versions of
 | |
| perl will generate a fatal diagnostic.
 | |
| 
 | |
| =back
 | |
| 
 | |
| =head2 Loading specific sets of patterns.
 | |
| 
 | |
| By default, all the sets of patterns listed below are made available.
 | |
| However, it is possible to indicate which sets of patterns should
 | |
| be made available - the wanted sets should be given as arguments to
 | |
| C<use>. Alternatively, it is also possible to indicate which sets of
 | |
| patterns should not be made available - those sets will be given as
 | |
| argument to the C<use> statement, but are preceded with an exclaimation
 | |
| mark. The argument I<no_defaults> indicates none of the default patterns
 | |
| should be made available. This is useful for instance if all you want
 | |
| is the C<pattern()> subroutine.
 | |
| 
 | |
| Examples:
 | |
| 
 | |
|  use Regexp::Common qw /comment number/;  # Comment and number patterns.
 | |
|  use Regexp::Common qw /no_defaults/;     # Don't load any patterns.
 | |
|  use Regexp::Common qw /!delimited/;      # All, but delimited patterns.
 | |
| 
 | |
| It's also possible to load your own set of patterns. If you have a
 | |
| module C<Regexp::Common::my_patterns> that makes patterns available,
 | |
| you can have it made available with
 | |
| 
 | |
|  use Regexp::Common qw /my_patterns/;
 | |
| 
 | |
| Note that the default patterns will still be made available - only if
 | |
| you use I<no_defaults>, or mention one of the default sets explicitly,
 | |
| the non mentioned defaults aren't made available.
 | |
| 
 | |
| =head2 List of available patterns
 | |
| 
 | |
| The patterns listed below are currently available. Each set of patterns
 | |
| has its own manual page describing the details. For each pattern set
 | |
| named I<name>, the manual page I<Regexp::Common::name> describes the
 | |
| details.
 | |
| 
 | |
| Currently available are:
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item Regexp::Common::balanced
 | |
| 
 | |
| Provides regexes for strings with balanced parenthesized delimiters.
 | |
| 
 | |
| =item Regexp::Common::comment
 | |
| 
 | |
| Provides regexes for comments of various languages (43 languages
 | |
| currently).
 | |
| 
 | |
| =item Regexp::Common::delimited
 | |
| 
 | |
| Provides regexes for delimited strings.
 | |
| 
 | |
| =item Regexp::Common::lingua
 | |
| 
 | |
| Provides regexes for palindromes.
 | |
| 
 | |
| =item Regexp::Common::list
 | |
| 
 | |
| Provides regexes for lists.
 | |
| 
 | |
| =item Regexp::Common::net
 | |
| 
 | |
| Provides regexes for IPv4, IPv6, and MAC addresses.
 | |
| 
 | |
| =item Regexp::Common::number
 | |
| 
 | |
| Provides regexes for numbers (integers and reals).
 | |
| 
 | |
| =item Regexp::Common::profanity
 | |
| 
 | |
| Provides regexes for profanity.
 | |
| 
 | |
| =item Regexp::Common::whitespace
 | |
| 
 | |
| Provides regexes for leading and trailing whitespace.
 | |
| 
 | |
| =item Regexp::Common::zip
 | |
| 
 | |
| Provides regexes for zip codes.
 | |
| 
 | |
| =back
 | |
| 
 | |
| =head2 Forthcoming patterns and features
 | |
| 
 | |
| Future releases of the module will also provide patterns for the following:
 | |
| 
 | |
|         * email addresses
 | |
|         * HTML/XML tags
 | |
|         * more numerical matchers,
 | |
|         * mail headers (including multiline ones),
 | |
|         * more URLS
 | |
|         * telephone numbers of various countries
 | |
|         * currency (universal 3 letter format, Latin-1, currency names)
 | |
|         * dates
 | |
|         * binary formats (e.g. UUencoded, MIMEd)
 | |
| 
 | |
| If you have other patterns or pattern generators that you think would be
 | |
| generally useful, please send them to the maintainer -- preferably as source
 | |
| code using the C<pattern> subroutine. Submissions that include a set of
 | |
| tests will be especially welcome.
 | |
| 
 | |
| 
 | |
| =head1 DIAGNOSTICS
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item C<Can't export unknown subroutine %s>
 | |
| 
 | |
| The subroutine-based interface didn't recognize the requested subroutine.
 | |
| Often caused by a spelling mistake or an incompletely specified name.
 | |
| 
 | |
| 
 | |
| =item C<Can't create unknown regex: $RE{...}>
 | |
| 
 | |
| Regexp::Common doesn't have a generator for the requested pattern.
 | |
| Often indicates a misspelt or missing parameter.
 | |
| 
 | |
| =item
 | |
| C<Perl %f does not support the pattern $RE{...}.
 | |
| You need Perl %f or later>
 | |
| 
 | |
| The requested pattern requires advanced regex features (e.g. recursion)
 | |
| that not available in your version of Perl. Time to upgrade.
 | |
| 
 | |
| =item C<< pattern() requires argument: name => [ @list ] >>
 | |
| 
 | |
| Every user-defined pattern specification must have a name.
 | |
| 
 | |
| =item C<< pattern() requires argument: create => $sub_ref_or_string >>
 | |
| 
 | |
| Every user-defined pattern specification must provide a pattern creation
 | |
| mechanism: either a pattern string or a reference to a subroutine that
 | |
| returns the pattern string.
 | |
| 
 | |
| =item C<Base must be between 1 and 36>
 | |
| 
 | |
| The C<< $RE{num}{real}{-base=>'I<N>'} >> pattern uses the characters [0-9A-Z]
 | |
| to represent the digits of various bases. Hence it only produces
 | |
| regular expressions for bases up to hexatricensimal.
 | |
| 
 | |
| =item C<Must specify delimiter in $RE{delimited}>
 | |
| 
 | |
| The pattern has no default delimiter.
 | |
| You need to write: C<< $RE{delimited}{-delim=>I<X>'} >> for some character I<X>
 | |
| 
 | |
| =back
 | |
| 
 | |
| =head1 ACKNOWLEDGEMENTS
 | |
| 
 | |
| Deepest thanks to the many people who have encouraged and contributed to this
 | |
| project, especially: Elijah, Jarkko, Tom, Nat, Ed, and Vivek.
 | |
| 
 | |
| Further thanks go to: Alexandr Ciornii, Blair Zajac, Bob Stockdale,
 | |
| Charles Thomas, Chris Vertonghen, the CPAN Testers, David Hand,
 | |
| Fany, Geoffrey Leach, Hermann-Marcus Behrens, Jerome Quelin, Jim Cromie,
 | |
| Lars Wilke, Linda Julien, Mike Arms, Mike Castle, Mikko, Murat Uenalan,
 | |
| RafaE<235>l Garcia-Suarez, Ron Savage, Sam Vilain, Slaven Rezic, Smylers,
 | |
| Tim Maher, and all the others I've forgotten.
 | |
| 
 | |
| =head1 AUTHOR
 | |
| 
 | |
| Damian Conway (damian@conway.org)
 | |
| 
 | |
| =head1 MAINTENANCE
 | |
| 
 | |
| This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>.
 | |
| 
 | |
| =head1 BUGS AND IRRITATIONS
 | |
| 
 | |
| Bound to be plenty.
 | |
| 
 | |
| For a start, there are many common regexes missing.
 | |
| Send them in to I<regexp-common@abigail.be>.
 | |
| 
 | |
| There are some POD issues when installing this module using a pre-5.6.0 perl;
 | |
| some manual pages may not install, or may not install correctly using a perl
 | |
| that is that old. You might consider upgrading your perl.
 | |
| 
 | |
| =head1 NOT A BUG
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item *
 | |
| 
 | |
| The various patterns are not anchored. That is, a pattern like
 | |
| C<< $RE {num} {int} >> will match against "abc4def", because a
 | |
| substring of the subject matches. This is by design, and not a
 | |
| bug. If you want the pattern to be anchored, use something like:
 | |
| 
 | |
|  my $integer = $RE {num} {int};
 | |
|  $subj =~ /^$integer$/ and print "Matches!\n";
 | |
| 
 | |
| =back
 | |
| 
 | |
| =head1 LICENSE and COPYRIGHT
 | |
| 
 | |
| This software is Copyright (c) 2001 - 2017, Damian Conway and Abigail.
 | |
| 
 | |
| This module is free software, and maybe used under any of the following
 | |
| licenses:
 | |
| 
 | |
|  1) The Perl Artistic License.     See the file COPYRIGHT.AL.
 | |
|  2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
 | |
|  3) The BSD License.               See the file COPYRIGHT.BSD.
 | |
|  4) The MIT License.               See the file COPYRIGHT.MIT.
 | |
| EOCommon
 | |
| # 2}}}
 | |
| $Regexp_Common_Contents{'Common/comment'} = <<'EOC';   # {{{2
 | |
| package Regexp::Common::comment;
 | |
| 
 | |
| use 5.10.0;
 | |
| 
 | |
| use strict;
 | |
| use warnings;
 | |
| no  warnings 'syntax';
 | |
| 
 | |
| use Regexp::Common qw /pattern clean no_defaults/;
 | |
| 
 | |
| our $VERSION = '2017060201';
 | |
| 
 | |
| my @generic = (
 | |
|     {languages => [qw /ABC Forth/],
 | |
|      to_eol    => ['\\\\']},   # This is for just a *single* backslash.
 | |
| 
 | |
|     {languages => [qw /Ada Alan Eiffel lua/],
 | |
|      to_eol    => ['--']},
 | |
| 
 | |
|     {languages => [qw /Advisor/],
 | |
|      to_eol    => ['#|//']},
 | |
| 
 | |
|     {languages => [qw /Advsys CQL Lisp LOGO M MUMPS REBOL Scheme
 | |
|                        SMITH zonefile/],
 | |
|      to_eol    => [';']},
 | |
| 
 | |
|     {languages => ['Algol 60'],
 | |
|      from_to   => [[qw /comment ;/]]},
 | |
| 
 | |
|     {languages => [qw {ALPACA B C C-- LPC PL/I}],
 | |
|      from_to   => [[qw {/* */}]]},
 | |
| 
 | |
|     {languages => [qw /awk fvwm2 Icon m4 mutt Perl Python QML
 | |
|                        R Ruby shell Tcl/],
 | |
|      to_eol    => ['#']},
 | |
| 
 | |
|     {languages => [[BASIC => 'mvEnterprise']],
 | |
|      to_eol    => ['[*!]|REM']},
 | |
| 
 | |
|     {languages => [qw /Befunge-98 Funge-98 Shelta/],
 | |
|      id        => [';']},
 | |
| 
 | |
|     {languages => ['beta-Juliet', 'Crystal Report', 'Portia', 'Ubercode'],
 | |
|      to_eol    => ['//']},
 | |
| 
 | |
|     {languages => ['BML'],
 | |
|      from_to   => [['<?_c', '_c?>']],
 | |
|     },
 | |
| 
 | |
|     {languages => [qw /C++/, 'C#', 'X++', qw /Cg ECMAScript FPL Java JavaScript/],
 | |
|      to_eol    => ['//'],
 | |
|      from_to   => [[qw {/* */}]]},
 | |
| 
 | |
|     {languages => [qw /CLU LaTeX slrn TeX/],
 | |
|      to_eol    => ['%']},
 | |
| 
 | |
|     {languages => [qw /False/],
 | |
|      from_to   => [[qw !{ }!]]},
 | |
| 
 | |
|     {languages => [qw /Fortran/],
 | |
|      to_eol    => ['!']},
 | |
| 
 | |
|     {languages => [qw /Haifu/],
 | |
|      id        => [',']},
 | |
| 
 | |
|     {languages => [qw /ILLGOL/],
 | |
|      to_eol    => ['NB']},
 | |
| 
 | |
|     {languages => [qw /INTERCAL/],
 | |
|      to_eol    => [q{(?:(?:PLEASE(?:\s+DO)?|DO)\s+)?(?:NOT|N'T)}]},
 | |
| 
 | |
|     {languages => [qw /J/],
 | |
|      to_eol    => ['NB[.]']},
 | |
| 
 | |
|     {languages => [qw /JavaDoc/],
 | |
|      from_to   => [[qw {/** */}]]},
 | |
| 
 | |
|     {languages => [qw /Nickle/],
 | |
|      to_eol    => ['#'],
 | |
|      from_to   => [[qw {/* */}]]},
 | |
| 
 | |
|     {languages => [qw /Oberon/],
 | |
|      from_to   => [[qw /(* *)/]]},
 | |
| 
 | |
|     {languages => [[qw /Pascal Delphi/], [qw /Pascal Free/], [qw /Pascal GPC/]],
 | |
|      to_eol    => ['//'],
 | |
|      from_to   => [[qw !{ }!], [qw !(* *)!]]},
 | |
| 
 | |
|     {languages => [[qw /Pascal Workshop/]],
 | |
|      id        => [qw /"/],
 | |
|      from_to   => [[qw !{ }!], [qw !(* *)!], [qw !/* */!]]},
 | |
| 
 | |
|     {languages => [qw /PEARL/],
 | |
|      to_eol    => ['!'],
 | |
|      from_to   => [[qw {/* */}]]},
 | |
| 
 | |
|     {languages => [qw /PHP/],
 | |
|      to_eol    => ['#', '//'],
 | |
|      from_to   => [[qw {/* */}]]},
 | |
| 
 | |
|     {languages => [qw !PL/B!],
 | |
|      to_eol    => ['[.;]']},
 | |
| 
 | |
|     {languages => [qw !PL/SQL!],
 | |
|      to_eol    => ['--'],
 | |
|      from_to   => [[qw {/* */}]]},
 | |
| 
 | |
|     {languages => [qw /Q-BAL/],
 | |
|      to_eol    => ['`']},
 | |
| 
 | |
|     {languages => [qw /Smalltalk/],
 | |
|      id        => ['"']},
 | |
| 
 | |
|     {languages => [qw /SQL/],
 | |
|      to_eol    => ['-{2,}']},
 | |
| 
 | |
|     {languages => [qw /troff/],
 | |
|      to_eol    => ['\\\"']},
 | |
| 
 | |
|     {languages => [qw /vi/],
 | |
|      to_eol    => ['"']},
 | |
| 
 | |
|     {languages => [qw /*W/],
 | |
|      from_to   => [[qw {|| !!}]]},
 | |
| 
 | |
|     {languages => [qw /ZZT-OOP/],
 | |
|      to_eol    => ["'"]},
 | |
| );
 | |
| 
 | |
| my @plain_or_nested = (
 | |
|    [Caml         =>  undef,       "(*"  => "*)"],
 | |
|    [Dylan        =>  "//",        "/*"  => "*/"],
 | |
|    [Haskell      =>  "-{2,}",     "{-"  => "-}"],
 | |
|    [Hugo         =>  "!(?!\\\\)", "!\\" => "\\!"],
 | |
|    [SLIDE        =>  "#",         "(*"  => "*)"],
 | |
|   ['Modula-2'    =>  undef,       "(*"  => "*)"],
 | |
|   ['Modula-3'    =>  undef,       "(*"  => "*)"],
 | |
| );
 | |
| 
 | |
| #
 | |
| # Helper subs.
 | |
| #
 | |
| 
 | |
| sub combine      {
 | |
|     local $_ = join "|", @_;
 | |
|     if (@_ > 1) {
 | |
|         s/\(\?k:/(?:/g;
 | |
|         $_ = "(?k:$_)";
 | |
|     }
 | |
|     $_
 | |
| }
 | |
| 
 | |
| sub to_eol  ($)  {"(?k:(?k:$_[0])(?k:[^\\n]*)(?k:\\n))"}
 | |
| sub id      ($)  {"(?k:(?k:$_[0])(?k:[^$_[0]]*)(?k:$_[0]))"}  # One char only!
 | |
| sub from_to      {
 | |
|     my ($begin, $end) = @_;
 | |
| 
 | |
|     my $qb  = quotemeta $begin;
 | |
|     my $qe  = quotemeta $end;
 | |
|     my $fe  = quotemeta substr $end   => 0, 1;
 | |
|     my $te  = quotemeta substr $end   => 1;
 | |
| 
 | |
|     "(?k:(?k:$qb)(?k:(?:[^$fe]+|$fe(?!$te))*)(?k:$qe))";
 | |
| }
 | |
| 
 | |
| 
 | |
| my $count = 0;
 | |
| sub nested {
 | |
|     my ($begin, $end) = @_;
 | |
| 
 | |
|     $count ++;
 | |
|     my $r = '(??{$Regexp::Common::comment ['. $count . ']})';
 | |
| 
 | |
|     my $qb  = quotemeta $begin;
 | |
|     my $qe  = quotemeta $end;
 | |
|     my $fb  = quotemeta substr $begin => 0, 1;
 | |
|     my $fe  = quotemeta substr $end   => 0, 1;
 | |
| 
 | |
|     my $tb  = quotemeta substr $begin => 1;
 | |
|     my $te  = quotemeta substr $end   => 1;
 | |
| 
 | |
|     use re 'eval';
 | |
| 
 | |
|     my $re;
 | |
|     if ($fb eq $fe) {
 | |
|         $re = qr /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|$r)*$qe)/;
 | |
|     }
 | |
|     else {
 | |
|         local $"      =  "|";
 | |
|         my   @clauses =  "(?>[^$fb$fe]+)";
 | |
|         push @clauses => "$fb(?!$tb)" if length $tb;
 | |
|         push @clauses => "$fe(?!$te)" if length $te;
 | |
|         push @clauses =>  $r;
 | |
|         $re           =   qr /(?:$qb(?:@clauses)*$qe)/;
 | |
|     }
 | |
| 
 | |
|     $Regexp::Common::comment [$count] = qr/$re/;
 | |
| }
 | |
| 
 | |
| #
 | |
| # Process data.
 | |
| #
 | |
| 
 | |
| foreach my $info (@plain_or_nested) {
 | |
|     my ($language, $mark, $begin, $end) = @$info;
 | |
|     pattern name    => [comment => $language],
 | |
|             create  =>
 | |
|                 sub {my $re     = nested $begin => $end;
 | |
|                      my $prefix = defined $mark ? $mark . "[^\n]*\n|" : "";
 | |
|                      exists $_ [1] -> {-keep} ? qr /($prefix$re)/
 | |
|                                               : qr  /$prefix$re/
 | |
|                 },
 | |
|             ;
 | |
| }
 | |
| 
 | |
| 
 | |
| foreach my $group (@generic) {
 | |
|     my $pattern = combine +(map {to_eol   $_} @{$group -> {to_eol}}),
 | |
|                            (map {from_to @$_} @{$group -> {from_to}}),
 | |
|                            (map {id       $_} @{$group -> {id}}),
 | |
|                   ;
 | |
|     foreach my $language  (@{$group -> {languages}}) {
 | |
|         pattern name    => [comment => ref $language ? @$language : $language],
 | |
|                 create  => $pattern,
 | |
|                 ;
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| #
 | |
| # Other languages.
 | |
| #
 | |
| 
 | |
| # http://www.pascal-central.com/docs/iso10206.txt
 | |
| pattern name    => [qw /comment Pascal/],
 | |
|         create  => '(?k:' . '(?k:[{]|[(][*])'
 | |
|                           . '(?k:[^}*]*(?:[*](?![)])[^}*]*)*)'
 | |
|                           . '(?k:[}]|[*][)])'
 | |
|                           . ')'
 | |
|         ;
 | |
| 
 | |
| # http://www.templetons.com/brad/alice/language/
 | |
| pattern name    =>  [qw /comment Pascal Alice/],
 | |
|         create  =>  '(?k:(?k:[{])(?k:[^}\n]*)(?k:[}]))'
 | |
|         ;
 | |
| 
 | |
| 
 | |
| # http://westein.arb-phys.uni-dortmund.de/~wb/a68s.txt
 | |
| pattern name    => [qw (comment), 'Algol 68'],
 | |
|         create  => q {(?k:(?:#[^#]*#)|}                           .
 | |
|                    q {(?:\bco\b(?:[^c]+|\Bc|\bc(?!o\b))*\bco\b)|} .
 | |
|                    q {(?:\bcomment\b(?:[^c]+|\Bc|\bc(?!omment\b))*\bcomment\b))}
 | |
|         ;
 | |
| 
 | |
| 
 | |
| # See rules 91 and 92 of ISO 8879 (SGML).
 | |
| # Charles F. Goldfarb: "The SGML Handbook".
 | |
| # Oxford: Oxford University Press. 1990. ISBN 0-19-853737-9.
 | |
| # Ch. 10.3, pp 390.
 | |
| pattern name    => [qw (comment HTML)],
 | |
|         create  => q {(?k:(?k:<!)(?k:(?:--(?k:[^-]*(?:-[^-]+)*)--\s*)*)(?k:>))},
 | |
|         ;
 | |
| 
 | |
| 
 | |
| pattern name    => [qw /comment SQL MySQL/],
 | |
|         create  => q {(?k:(?:#|-- )[^\n]*\n|} .
 | |
|                    q {/\*(?:(?>[^*;"']+)|"[^"]*"|'[^']*'|\*(?!/))*(?:;|\*/))},
 | |
|         ;
 | |
| 
 | |
| # Anything that isn't <>[]+-.,
 | |
| # http://home.wxs.nl/~faase009/Ha_BF.html
 | |
| pattern name    => [qw /comment Brainfuck/],
 | |
|         create  => '(?k:[^<>\[\]+\-.,]+)'
 | |
|         ;
 | |
| 
 | |
| # Squeak is a variant of Smalltalk-80.
 | |
| # http://www.squeak.
 | |
| # http://mucow.com/squeak-qref.html
 | |
| pattern name    => [qw /comment Squeak/],
 | |
|         create  => '(?k:(?k:")(?k:[^"]*(?:""[^"]*)*)(?k:"))'
 | |
|         ;
 | |
| 
 | |
| #
 | |
| # Scores of less than 5 or above 17....
 | |
| # http://www.cliff.biffle.org/esoterica/beatnik.html
 | |
| @Regexp::Common::comment::scores = (1,  3,  3,  2,  1,  4,  2,  4,  1,  8,
 | |
|                                     5,  1,  3,  1,  1,  3, 10,  1,  1,  1,
 | |
|                                     1,  4,  4,  8,  4, 10);
 | |
| {
 | |
| my ($s, $x);
 | |
| pattern name    =>  [qw /comment Beatnik/],
 | |
|         create  =>  sub {
 | |
|             use re 'eval';
 | |
|             my $re = qr {\b([A-Za-z]+)\b
 | |
|                          (?(?{($s, $x) = (0, lc $^N);
 | |
|                               $s += $Regexp::Common::comment::scores
 | |
|                                     [ord (chop $x) - ord ('a')] while length $x;
 | |
|                               $s  >= 5 && $s < 18})XXX|)}x;
 | |
|             $re;
 | |
|         },
 | |
|         ;
 | |
| }
 | |
| 
 | |
| 
 | |
| # http://www.cray.com/craydoc/manuals/007-3692-005/html-007-3692-005/
 | |
| #  (Goto table of contents/3.3 Source Form)
 | |
| # Fortran, in fixed format. Comments start with a C, c or * in the first
 | |
| # column, or a ! anywhere, but the sixth column. Then end with a newline.
 | |
| pattern name    =>  [qw /comment Fortran fixed/],
 | |
|         create  =>  '(?k:(?k:(?:^[Cc*]|(?<!^.....)!))(?k:[^\n]*)(?k:\n))'
 | |
|         ;
 | |
| 
 | |
| 
 | |
| # http://www.csis.ul.ie/cobol/Course/COBOLIntro.htm
 | |
| # Traditionally, comments in COBOL were indicated with an asterisk in
 | |
| # the seventh column. Modern compilers may be more lenient.
 | |
| pattern name    =>  [qw /comment COBOL/],
 | |
|         create  =>  '(?<=^......)(?k:(?k:[*])(?k:[^\n]*)(?k:\n))',
 | |
|         ;
 | |
| 
 | |
| 1;
 | |
| 
 | |
| 
 | |
| __END__
 | |
| 
 | |
| =pod
 | |
| 
 | |
| =head1 NAME
 | |
| 
 | |
| Regexp::Common::comment -- provide regexes for comments.
 | |
| 
 | |
| =head1 SYNOPSIS
 | |
| 
 | |
|     use Regexp::Common qw /comment/;
 | |
| 
 | |
|     while (<>) {
 | |
|         /$RE{comment}{C}/       and  print "Contains a C comment\n";
 | |
|         /$RE{comment}{C++}/     and  print "Contains a C++ comment\n";
 | |
|         /$RE{comment}{PHP}/     and  print "Contains a PHP comment\n";
 | |
|         /$RE{comment}{Java}/    and  print "Contains a Java comment\n";
 | |
|         /$RE{comment}{Perl}/    and  print "Contains a Perl comment\n";
 | |
|         /$RE{comment}{awk}/     and  print "Contains an awk comment\n";
 | |
|         /$RE{comment}{HTML}/    and  print "Contains an HTML comment\n";
 | |
|     }
 | |
| 
 | |
|     use Regexp::Common qw /comment RE_comment_HTML/;
 | |
| 
 | |
|     while (<>) {
 | |
|         $_ =~ RE_comment_HTML() and  print "Contains an HTML comment\n";
 | |
|     }
 | |
| 
 | |
| =head1 DESCRIPTION
 | |
| 
 | |
| Please consult the manual of L<Regexp::Common> for a general description
 | |
| of the works of this interface.
 | |
| 
 | |
| Do not use this module directly, but load it via I<Regexp::Common>.
 | |
| 
 | |
| This modules gives you regular expressions for comments in various
 | |
| languages.
 | |
| 
 | |
| =head2 THE LANGUAGES
 | |
| 
 | |
| Below, the comments of each of the languages are described.
 | |
| The patterns are available as C<$RE{comment}{I<LANG>}>, foreach
 | |
| language I<LANG>. Some languages have variants; it's described
 | |
| at the individual languages how to get the patterns for the variants.
 | |
| Unless mentioned otherwise,
 | |
| C<{-keep}> sets C<$1>, C<$2>, C<$3> and C<$4> to the entire comment,
 | |
| the opening marker, the content of the comment, and the closing marker
 | |
| (for many languages, the latter is a newline) respectively.
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item ABC
 | |
| 
 | |
| Comments in I<ABC> start with a backslash (C<\>), and last till
 | |
| the end of the line.
 | |
| See L<http://homepages.cwi.nl/%7Esteven/abc/>.
 | |
| 
 | |
| =item Ada
 | |
| 
 | |
| Comments in I<Ada> start with C<-->, and last till the end of the line.
 | |
| 
 | |
| =item Advisor
 | |
| 
 | |
| I<Advisor> is a language used by the HP product I<glance>. Comments for
 | |
| this language start with either C<#> or C<//>, and last till the
 | |
| end of the line.
 | |
| 
 | |
| =item Advsys
 | |
| 
 | |
| Comments for the I<Advsys> language start with C<;> and last till
 | |
| the end of the line. See also L<http://www.wurb.com/if/devsys/12>.
 | |
| 
 | |
| =item Alan
 | |
| 
 | |
| I<Alan> comments start with C<-->, and last till the end of the line.
 | |
| See also L<http://w1.132.telia.com/~u13207378/alan/manual/alanTOC.html>.
 | |
| 
 | |
| =item Algol 60
 | |
| 
 | |
| Comments in the I<Algol 60> language start with the keyword C<comment>,
 | |
| and end with a C<;>. See L<http://www.masswerk.at/algol60/report.htm>.
 | |
| 
 | |
| =item Algol 68
 | |
| 
 | |
| In I<Algol 68>, comments are either delimited by C<#>, or by one of the
 | |
| keywords C<co> or C<comment>. The keywords should not be part of another
 | |
| word. See L<http://westein.arb-phys.uni-dortmund.de/~wb/a68s.txt>.
 | |
| With C<{-keep}>, only C<$1> will be set, returning the entire comment.
 | |
| 
 | |
| =item ALPACA
 | |
| 
 | |
| The I<ALPACA> language has comments starting with C</*> and ending with C<*/>.
 | |
| 
 | |
| =item awk
 | |
| 
 | |
| The I<awk> programming language uses comments that start with C<#>
 | |
| and end at the end of the line.
 | |
| 
 | |
| =item B
 | |
| 
 | |
| The I<B> language has comments starting with C</*> and ending with C<*/>.
 | |
| 
 | |
| =item BASIC
 | |
| 
 | |
| There are various forms of BASIC around. Currently, we only support the
 | |
| variant supported by I<mvEnterprise>, whose pattern is available as
 | |
| C<$RE{comment}{BASIC}{mvEnterprise}>. Comments in this language start with a
 | |
| C<!>, a C<*> or the keyword C<REM>, and end till the end of the line. See
 | |
| L<http://www.rainingdata.com/products/beta/docs/mve/50/ReferenceManual/Basic.pdf>.
 | |
| 
 | |
| =item Beatnik
 | |
| 
 | |
| The esotoric language I<Beatnik> only uses words consisting of letters.
 | |
| Words are scored according to the rules of Scrabble. Words scoring less
 | |
| than 5 points, or 18 points or more are considered comments (although
 | |
| the compiler might mock at you if you score less than 5 points).
 | |
| Regardless whether C<{-keep}>, C<$1> will be set, and set to the
 | |
| entire comment. This pattern requires I<perl 5.8.0> or newer.
 | |
| 
 | |
| =item beta-Juliet
 | |
| 
 | |
| The I<beta-Juliet> programming language has comments that start with
 | |
| C<//> and that continue till the end of the line. See also
 | |
| L<http://www.catseye.mb.ca/esoteric/b-juliet/index.html>.
 | |
| 
 | |
| =item Befunge-98
 | |
| 
 | |
| The esotoric language I<Befunge-98> uses comments that start and end
 | |
| with a C<;>. See L<http://www.catseye.mb.ca/esoteric/befunge/98/spec98.html>.
 | |
| 
 | |
| =item BML
 | |
| 
 | |
| I<BML>, or I<Better Markup Language> is an HTML templating language that
 | |
| uses comments starting with C<< <?c_ >>, and ending with C<< c_?> >>.
 | |
| See L<http://www.livejournal.com/doc/server/bml.index.html>.
 | |
| 
 | |
| =item Brainfuck
 | |
| 
 | |
| The minimal language I<Brainfuck> uses only eight characters,
 | |
| C<E<lt>>, C<E<gt>>, C<[>, C<]>, C<+>, C<->, C<.> and C<,>.
 | |
| Any other characters are considered comments. With C<{-keep}>,
 | |
| C<$1> is set to the entire comment.
 | |
| 
 | |
| =item C
 | |
| 
 | |
| The I<C> language has comments starting with C</*> and ending with C<*/>.
 | |
| 
 | |
| =item C--
 | |
| 
 | |
| The I<C--> language has comments starting with C</*> and ending with C<*/>.
 | |
| See L<http://cs.uas.arizona.edu/classes/453/programs/C--Spec.html>.
 | |
| 
 | |
| =item C++
 | |
| 
 | |
| The I<C++> language has two forms of comments. Comments that start with
 | |
| C<//> and last till the end of the line, and comments that start with
 | |
| C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
 | |
| set, and set to the entire comment.
 | |
| 
 | |
| =item C#
 | |
| 
 | |
| The I<C#> language has two forms of comments. Comments that start with
 | |
| C<//> and last till the end of the line, and comments that start with
 | |
| C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
 | |
| set, and set to the entire comment.
 | |
| See L<http://msdn.microsoft.com/library/default.asp?url=/library/en-us/csspec/html/vclrfcsharpspec_C.asp>.
 | |
| 
 | |
| =item Caml
 | |
| 
 | |
| Comments in I<Caml> start with C<(*>, end with C<*)>, and can be nested.
 | |
| See L<http://www.cs.caltech.edu/courses/cs134/cs134b/book.pdf> and
 | |
| L<http://pauillac.inria.fr/caml/index-eng.html>.
 | |
| 
 | |
| =item Cg
 | |
| 
 | |
| The I<Cg> language has two forms of comments. Comments that start with
 | |
| C<//> and last till the end of the line, and comments that start with
 | |
| C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
 | |
| set, and set to the entire comment.
 | |
| See L<http://developer.nvidia.com/attach/3722>.
 | |
| 
 | |
| =item CLU
 | |
| 
 | |
| In C<CLU>, a comment starts with a procent sign (C<%>), and ends with the
 | |
| next newline. See L<ftp://ftp.lcs.mit.edu:/pub/pclu/CLU-syntax.ps> and
 | |
| L<http://www.pmg.lcs.mit.edu/CLU.html>.
 | |
| 
 | |
| =item COBOL
 | |
| 
 | |
| Traditionally, comments in I<COBOL> are indicated by an asterisk in the
 | |
| seventh column. This is what the pattern matches. Modern compiler may
 | |
| more lenient though. See L<http://www.csis.ul.ie/cobol/Course/COBOLIntro.htm>,
 | |
| and L<http://www.csis.ul.ie/cobol/default.htm>.
 | |
| 
 | |
| =item CQL
 | |
| 
 | |
| Comments in the chess query language (I<CQL>) start with a semi colon
 | |
| (C<;>) and last till the end of the line. See L<http://www.rbnn.com/cql/>.
 | |
| 
 | |
| =item Crystal Report
 | |
| 
 | |
| The formula editor in I<Crystal Reports> uses comments that start
 | |
| with C<//>, and end with the end of the line.
 | |
| 
 | |
| =item Dylan
 | |
| 
 | |
| There are two types of comments in I<Dylan>. They either start with
 | |
| C<//>, or are nested comments, delimited with C</*> and C<*/>.
 | |
| Under C<{-keep}>, only C<$1> will be set, returning the entire comment.
 | |
| This pattern requires I<perl 5.6.0> or newer.
 | |
| 
 | |
| =item ECMAScript
 | |
| 
 | |
| The I<ECMAScript> language has two forms of comments. Comments that start with
 | |
| C<//> and last till the end of the line, and comments that start with
 | |
| C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
 | |
| set, and set to the entire comment. I<JavaScript> is Netscapes implementation
 | |
| of I<ECMAScript>. See
 | |
| L<http://www.ecma-international.org/publications/files/ecma-st/Ecma-262.pdf>,
 | |
| and L<http://www.ecma-international.org/publications/standards/Ecma-262.htm>.
 | |
| 
 | |
| =item Eiffel
 | |
| 
 | |
| I<Eiffel> comments start with C<-->, and last till the end of the line.
 | |
| 
 | |
| =item False
 | |
| 
 | |
| In I<False>, comments start with C<{> and end with C<}>.
 | |
| See L<http://wouter.fov120.com/false/false.txt>
 | |
| 
 | |
| =item FPL
 | |
| 
 | |
| The I<FPL> language has two forms of comments. Comments that start with
 | |
| C<//> and last till the end of the line, and comments that start with
 | |
| C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
 | |
| set, and set to the entire comment.
 | |
| 
 | |
| =item Forth
 | |
| 
 | |
| Comments in Forth start with C<\>, and end with the end of the line.
 | |
| See also L<http://docs.sun.com/sb/doc/806-1377-10>.
 | |
| 
 | |
| =item Fortran
 | |
| 
 | |
| There are two forms of I<Fortran>. There's free form I<Fortran>, which
 | |
| has comments that start with C<!>, and end at the end of the line.
 | |
| The pattern for this is given by C<$RE{Fortran}>. Fixed form I<Fortran>,
 | |
| which has been obsoleted, has comments that start with C<C>, C<c> or
 | |
| C<*> in the first column, or with C<!> anywhere, but the sixth column.
 | |
| The pattern for this are given by C<$RE{Fortran}{fixed}>.
 | |
| 
 | |
| See also L<http://www.cray.com/craydoc/manuals/007-3692-005/html-007-3692-005/>.
 | |
| 
 | |
| =item Funge-98
 | |
| 
 | |
| The esotoric language I<Funge-98> uses comments that start and end with
 | |
| a C<;>.
 | |
| 
 | |
| =item fvwm2
 | |
| 
 | |
| Configuration files for I<fvwm2> have comments starting with a
 | |
| C<#> and lasting the rest of the line.
 | |
| 
 | |
| =item Haifu
 | |
| 
 | |
| I<Haifu>, an esotoric language using haikus, has comments starting and
 | |
| ending with a C<,>.
 | |
| See L<http://www.dangermouse.net/esoteric/haifu.html>.
 | |
| 
 | |
| =item Haskell
 | |
| 
 | |
| There are two types of comments in I<Haskell>. They either start with
 | |
| at least two dashes, or are nested comments, delimited with C<{-> and C<-}>.
 | |
| Under C<{-keep}>, only C<$1> will be set, returning the entire comment.
 | |
| This pattern requires I<perl 5.6.0> or newer.
 | |
| 
 | |
| =item HTML
 | |
| 
 | |
| In I<HTML>, comments only appear inside a I<comment declaration>.
 | |
| A comment declaration starts with a C<E<lt>!>, and ends with a
 | |
| C<E<gt>>. Inside this declaration, we have zero or more comments.
 | |
| Comments starts with C<--> and end with C<-->, and are optionally
 | |
| followed by whitespace. The pattern C<$RE{comment}{HTML}> recognizes
 | |
| those comment declarations (and hence more than a comment).
 | |
| Note that this is not the same as something that starts with
 | |
| C<E<lt>!--> and ends with C<--E<gt>>, because the following will
 | |
| be matched completely:
 | |
| 
 | |
|     <!--  First  Comment   --
 | |
|       --> Second Comment <!--
 | |
|       --  Third  Comment   -->
 | |
| 
 | |
| Do not be fooled by what your favourite browser thinks is an HTML
 | |
| comment.
 | |
| 
 | |
| If C<{-keep}> is used, the following are returned:
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item $1
 | |
| 
 | |
| captures the entire comment declaration.
 | |
| 
 | |
| =item $2
 | |
| 
 | |
| captures the MDO (markup declaration open), C<E<lt>!>.
 | |
| 
 | |
| =item $3
 | |
| 
 | |
| captures the content between the MDO and the MDC.
 | |
| 
 | |
| =item $4
 | |
| 
 | |
| captures the (last) comment, without the surrounding dashes.
 | |
| 
 | |
| =item $5
 | |
| 
 | |
| captures the MDC (markup declaration close), C<E<gt>>.
 | |
| 
 | |
| =back
 | |
| 
 | |
| =item Hugo
 | |
| 
 | |
| There are two types of comments in I<Hugo>. They either start with
 | |
| C<!> (which cannot be followed by a C<\>), or are nested comments,
 | |
| delimited with C<!\> and C<\!>.
 | |
| Under C<{-keep}>, only C<$1> will be set, returning the entire comment.
 | |
| This pattern requires I<perl 5.6.0> or newer.
 | |
| 
 | |
| =item Icon
 | |
| 
 | |
| I<Icon> has comments that start with C<#> and end at the next new line.
 | |
| See L<http://www.toolsofcomputing.com/IconHandbook/IconHandbook.pdf>,
 | |
| L<http://www.cs.arizona.edu/icon/index.htm>, and
 | |
| L<http://burks.bton.ac.uk/burks/language/icon/index.htm>.
 | |
| 
 | |
| =item ILLGOL
 | |
| 
 | |
| The esotoric language I<ILLGOL> uses comments starting with I<NB> and lasting
 | |
| till the end of the line.
 | |
| See L<http://www.catseye.mb.ca/esoteric/illgol/index.html>.
 | |
| 
 | |
| =item INTERCAL
 | |
| 
 | |
| Comments in INTERCAL are single line comments. They start with one of
 | |
| the keywords C<NOT> or C<N'T>, and can optionally be preceded by the
 | |
| keywords C<DO> and C<PLEASE>. If both keywords are used, C<PLEASE>
 | |
| precedes C<DO>. Keywords are separated by whitespace.
 | |
| 
 | |
| =item J
 | |
| 
 | |
| The language I<J> uses comments that start with C<NB.>, and that last till
 | |
| the end of the line. See
 | |
| L<http://www.jsoftware.com/books/help/primer/contents.htm>, and
 | |
| L<http://www.jsoftware.com/>.
 | |
| 
 | |
| =item Java
 | |
| 
 | |
| The I<Java> language has two forms of comments. Comments that start with
 | |
| C<//> and last till the end of the line, and comments that start with
 | |
| C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
 | |
| set, and set to the entire comment.
 | |
| 
 | |
| =item JavaDoc
 | |
| 
 | |
| The I<Javadoc> documentation syntax is demarked with a subset of
 | |
| ordinary Java comments to separate it from code.  Comments start with
 | |
| C</**> end with C<*/>.  If C<{-keep}> is used, only C<$1> will be set,
 | |
| and set to the entire comment. See
 | |
| L<http://www.oracle.com/technetwork/java/javase/documentation/index-137868.html#format>.
 | |
| 
 | |
| =item JavaScript
 | |
| 
 | |
| The I<JavaScript> language has two forms of comments. Comments that start with
 | |
| C<//> and last till the end of the line, and comments that start with
 | |
| C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
 | |
| set, and set to the entire comment. I<JavaScript> is Netscapes implementation
 | |
| of I<ECMAScript>.
 | |
| See L<http://www.mozilla.org/js/language/E262-3.pdf>,
 | |
| and L<http://www.mozilla.org/js/language/>.
 | |
| 
 | |
| =item LaTeX
 | |
| 
 | |
| The documentation language I<LaTeX> uses comments starting with C<%>
 | |
| and ending at the end of the line.
 | |
| 
 | |
| =item Lisp
 | |
| 
 | |
| Comments in I<Lisp> start with a semi-colon (C<;>) and last till the
 | |
| end of the line.
 | |
| 
 | |
| =item LPC
 | |
| 
 | |
| The I<LPC> language has comments starting with C</*> and ending with C<*/>.
 | |
| 
 | |
| =item LOGO
 | |
| 
 | |
| Comments for the language I<LOGO> start with C<;>, and last till the end
 | |
| of the line.
 | |
| 
 | |
| =item lua
 | |
| 
 | |
| Comments for the I<lua> language start with C<-->, and last till the end
 | |
| of the line. See also L<http://www.lua.org/manual/manual.html>.
 | |
| 
 | |
| =item M, MUMPS
 | |
| 
 | |
| In C<M> (aka C<MUMPS>), comments start with a semi-colon, and last
 | |
| till the end of a line. The language specification requires the
 | |
| semi-colon to be preceded by one or more I<linestart character>s.
 | |
| Those characters default to a space, but that's configurable. This
 | |
| requirement, of preceding the comment with linestart characters is
 | |
| B<not> tested for. See
 | |
| L<ftp://ftp.intersys.com/pub/openm/ism/ism64docs.zip>,
 | |
| L<http://mtechnology.intersys.com/mproducts/openm/index.html>, and
 | |
| L<http://mcenter.com/mtrc/index.html>.
 | |
| 
 | |
| =item m4
 | |
| 
 | |
| By default, the preprocessor language I<m4> uses single line comments,
 | |
| that start with a C<#> and continue to the end of the line, including
 | |
| the newline. The pattern C<$RE {comment} {m4}> matches such comments.
 | |
| In I<m4>, it is possible to change the starting token though.
 | |
| See L<http://wolfram.schneider.org/bsd/7thEdManVol2/m4/m4.pdf>,
 | |
| L<http://www.cs.stir.ac.uk/~kjt/research/pdf/expl-m4.pdf>, and
 | |
| L<http://www.gnu.org/software/m4/manual/>.
 | |
| 
 | |
| =item Modula-2
 | |
| 
 | |
| In C<Modula-2>, comments start with C<(*>, and end with C<*)>. Comments
 | |
| may be nested. See L<http://www.modula2.org/>.
 | |
| 
 | |
| =item Modula-3
 | |
| 
 | |
| In C<Modula-3>, comments start with C<(*>, and end with C<*)>. Comments
 | |
| may be nested. See L<http://www.m3.org/>.
 | |
| 
 | |
| =item mutt
 | |
| 
 | |
| Configuration files for I<mutt> have comments starting with a
 | |
| C<#> and lasting the rest of the line.
 | |
| 
 | |
| =item Nickle
 | |
| 
 | |
| The I<Nickle> language has one line comments starting with C<#>
 | |
| (like Perl), or multiline comments delimited by C</*> and C<*/>
 | |
| (like C). Under C<-keep>, only C<$1> will be set. See also
 | |
| L<http://www.nickle.org>.
 | |
| 
 | |
| =item Oberon
 | |
| 
 | |
| Comments in I<Oberon> start with C<(*> and end with C<*)>.
 | |
| See L<http://www.oberon.ethz.ch/oreport.html>.
 | |
| 
 | |
| =item Pascal
 | |
| 
 | |
| There are many implementations of Pascal. This modules provides
 | |
| pattern for comments of several implementations.
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item C<$RE{comment}{Pascal}>
 | |
| 
 | |
| This is the pattern that recognizes comments according to the Pascal ISO
 | |
| standard. This standard says that comments start with either C<{>, or
 | |
| C<(*>, and end with C<}> or C<*)>. This means that C<{*)> and C<(*}>
 | |
| are considered to be comments. Many Pascal applications don't allow this.
 | |
| See L<http://www.pascal-central.com/docs/iso10206.txt>
 | |
| 
 | |
| =item C<$RE{comment}{Pascal}{Alice}>
 | |
| 
 | |
| The I<Alice Pascal> compiler accepts comments that start with C<{>
 | |
| and end with C<}>. Comments are not allowed to contain newlines.
 | |
| See L<http://www.templetons.com/brad/alice/language/>.
 | |
| 
 | |
| =item C<$RE{comment}{Pascal}{Delphi}>, C<$RE{comment}{Pascal}{Free}>
 | |
| and C<$RE{comment}{Pascal}{GPC}>
 | |
| 
 | |
| The I<Delphi Pascal>, I<Free Pascal> and the I<Gnu Pascal Compiler>
 | |
| implementations of Pascal all have comments that either start with
 | |
| C<//> and last till the end of the line, are delimited with C<{>
 | |
| and C<}> or are delimited with C<(*> and C<*)>. Patterns for those
 | |
| comments are given by C<$RE{comment}{Pascal}{Delphi}>,
 | |
| C<$RE{comment}{Pascal}{Free}> and C<$RE{comment}{Pascal}{GPC}>
 | |
| respectively. These patterns only set C<$1> when C<{-keep}> is used,
 | |
| which will then include the entire comment.
 | |
| 
 | |
| See L<http://info.borland.com/techpubs/delphi5/oplg/>,
 | |
| L<http://www.freepascal.org/docs-html/ref/ref.html> and
 | |
| L<http://www.gnu-pascal.de/gpc/>.
 | |
| 
 | |
| =item C<$RE{comment}{Pascal}{Workshop}>
 | |
| 
 | |
| The I<Workshop Pascal> compiler, from SUN Microsystems, allows comments
 | |
| that are delimited with either C<{> and C<}>, delimited with
 | |
| C<(*)> and C<*>), delimited with C</*>, and C<*/>, or starting
 | |
| and ending with a double quote (C<">). When C<{-keep}> is used,
 | |
| only C<$1> is set, and returns the entire comment.
 | |
| 
 | |
| See L<http://docs.sun.com/db/doc/802-5762>.
 | |
| 
 | |
| =back
 | |
| 
 | |
| =item PEARL
 | |
| 
 | |
| Comments in I<PEARL> start with a C<!> and last till the end of the
 | |
| line, or start with C</*> and end with C<*/>. With C<{-keep}>,
 | |
| C<$1> will be set to the entire comment.
 | |
| 
 | |
| =item PHP
 | |
| 
 | |
| Comments in I<PHP> start with either C<#> or C<//> and last till the
 | |
| end of the line, or are delimited by C</*> and C<*/>. With C<{-keep}>,
 | |
| C<$1> will be set to the entire comment.
 | |
| 
 | |
| =item PL/B
 | |
| 
 | |
| In I<PL/B>, comments start with either C<.> or C<;>, and end with the
 | |
| next newline. See L<http://www.mmcctech.com/pl-b/plb-0010.htm>.
 | |
| 
 | |
| =item PL/I
 | |
| 
 | |
| The I<PL/I> language has comments starting with C</*> and ending with C<*/>.
 | |
| 
 | |
| =item PL/SQL
 | |
| 
 | |
| In I<PL/SQL>, comments either start with C<--> and run till the end
 | |
| of the line, or start with C</*> and end with C<*/>.
 | |
| 
 | |
| =item Perl
 | |
| 
 | |
| I<Perl> uses comments that start with a C<#>, and continue till the end
 | |
| of the line.
 | |
| 
 | |
| =item Portia
 | |
| 
 | |
| The I<Portia> programming language has comments that start with C<//>,
 | |
| and last till the end of the line.
 | |
| 
 | |
| =item Python
 | |
| 
 | |
| I<Python> uses comments that start with a C<#>, and continue till the end
 | |
| of the line.
 | |
| 
 | |
| =item Q-BAL
 | |
| 
 | |
| Comments in the I<Q-BAL> language start with C<`> (a backtick), and
 | |
| continue till the end of the line.
 | |
| 
 | |
| =item QML
 | |
| 
 | |
| In C<QML>, comments start with C<#> and last till the end of the line.
 | |
| See L<http://www.questionmark.com/uk/qml/overview.doc>.
 | |
| 
 | |
| =item R
 | |
| 
 | |
| The statistical language I<R> uses comments that start with a C<#> and
 | |
| end with the following new line. See L<http://www.r-project.org/>.
 | |
| 
 | |
| =item REBOL
 | |
| 
 | |
| Comments for the I<REBOL> language start with C<;> and last till the
 | |
| end of the line.
 | |
| 
 | |
| =item Ruby
 | |
| 
 | |
| Comments in I<Ruby> start with C<#> and last till the end of the time.
 | |
| 
 | |
| =item Scheme
 | |
| 
 | |
| I<Scheme> comments start with C<;>, and last till the end of the line.
 | |
| See L<http://schemers.org/>.
 | |
| 
 | |
| =item shell
 | |
| 
 | |
| Comments in various I<shell>s start with a C<#> and end at the end of
 | |
| the line.
 | |
| 
 | |
| =item Shelta
 | |
| 
 | |
| The esotoric language I<Shelta> uses comments that start and end with
 | |
| a C<;>. See L<http://www.catseye.mb.ca/esoteric/shelta/index.html>.
 | |
| 
 | |
| =item SLIDE
 | |
| 
 | |
| The I<SLIDE> language has two forms of comments. First there is the
 | |
| line comment, which starts with a C<#> and includes the rest of the
 | |
| line (just like Perl). Second, there is the multiline, nested comment,
 | |
| which are delimited by C<(*> and C<*)>. Under C{-keep}>, only
 | |
| C<$1> is set, and is set to the entire comment. See
 | |
| L<http://www.cs.berkeley.edu/~ug/slide/docs/slide/spec/spec_frame_intro.shtml>.
 | |
| 
 | |
| =item slrn
 | |
| 
 | |
| Configuration files for I<slrn> have comments starting with a
 | |
| C<%> and lasting the rest of the line.
 | |
| 
 | |
| =item Smalltalk
 | |
| 
 | |
| I<Smalltalk> uses comments that start and end with a double quote, C<">.
 | |
| 
 | |
| =item SMITH
 | |
| 
 | |
| Comments in the I<SMITH> language start with C<;>, and last till the
 | |
| end of the line.
 | |
| 
 | |
| =item Squeak
 | |
| 
 | |
| In the Smalltalk variant I<Squeak>, comments start and end with
 | |
| C<">. Double quotes can appear inside comments by doubling them.
 | |
| 
 | |
| =item SQL
 | |
| 
 | |
| Standard I<SQL> uses comments starting with two or more dashes, and
 | |
| ending at the end of the line.
 | |
| 
 | |
| I<MySQL> does not follow the standard. Instead, it allows comments
 | |
| that start with a C<#> or C<-- > (that's two dashes and a space)
 | |
| ending with the following newline, and comments starting with
 | |
| C</*>, and ending with the next C<;> or C<*/> that isn't inside
 | |
| single or double quotes. A pattern for this is returned by
 | |
| C<$RE{comment}{SQL}{MySQL}>. With C<{-keep}>, only C<$1> will
 | |
| be set, and it returns the entire comment.
 | |
| 
 | |
| =item Tcl
 | |
| 
 | |
| In I<Tcl>, comments start with C<#> and continue till the end of the line.
 | |
| 
 | |
| =item TeX
 | |
| 
 | |
| The documentation language I<TeX> uses comments starting with C<%>
 | |
| and ending at the end of the line.
 | |
| 
 | |
| =item troff
 | |
| 
 | |
| The document formatting language I<troff> uses comments starting
 | |
| with C<\">, and continuing till the end of the line.
 | |
| 
 | |
| =item Ubercode
 | |
| 
 | |
| The Windows programming language I<Ubercode> uses comments that start with
 | |
| C<//> and continue to the end of the line. See L<http://www.ubercode.com>.
 | |
| 
 | |
| =item vi
 | |
| 
 | |
| In configuration files for the editor I<vi>, one can use comments
 | |
| starting with C<">, and ending at the end of the line.
 | |
| 
 | |
| =item *W
 | |
| 
 | |
| In the language I<*W>, comments start with C<||>, and end with C<!!>.
 | |
| 
 | |
| =item zonefile
 | |
| 
 | |
| Comments in DNS I<zonefile>s start with C<;>, and continue till the
 | |
| end of the line.
 | |
| 
 | |
| =item ZZT-OOP
 | |
| 
 | |
| The in-game language I<ZZT-OOP> uses comments that start with a C<'>
 | |
| character, and end at the following newline. See
 | |
| L<http://dave2.rocketjump.org/rad/zzthelp/lang.html>.
 | |
| 
 | |
| =back
 | |
| 
 | |
| =head1 REFERENCES
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item B<[Go 90]>
 | |
| 
 | |
| Charles F. Goldfarb: I<The SGML Handbook>. Oxford: Oxford University
 | |
| Press. B<1990>. ISBN 0-19-853737-9. Ch. 10.3, pp 390-391.
 | |
| 
 | |
| =back
 | |
| 
 | |
| =head1 SEE ALSO
 | |
| 
 | |
| L<Regexp::Common> for a general description of how to use this interface.
 | |
| 
 | |
| =head1 AUTHOR
 | |
| 
 | |
| Damian Conway (damian@conway.org)
 | |
| 
 | |
| =head1 MAINTENANCE
 | |
| 
 | |
| This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>.
 | |
| 
 | |
| =head1 BUGS AND IRRITATIONS
 | |
| 
 | |
| Bound to be plenty.
 | |
| 
 | |
| For a start, there are many common regexes missing.
 | |
| Send them in to I<regexp-common@abigail.be>.
 | |
| 
 | |
| =head1 LICENSE and COPYRIGHT
 | |
| 
 | |
| This software is Copyright (c) 2001 - 2017, Damian Conway and Abigail.
 | |
| 
 | |
| This module is free software, and maybe used under any of the following
 | |
| licenses:
 | |
| 
 | |
|  1) The Perl Artistic License.     See the file COPYRIGHT.AL.
 | |
|  2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
 | |
|  3) The BSD License.               See the file COPYRIGHT.BSD.
 | |
|  4) The MIT License.               See the file COPYRIGHT.MIT.
 | |
| 
 | |
| =cut
 | |
| EOC
 | |
| # 2}}}
 | |
| $Regexp_Common_Contents{'Common/balanced'} = <<'EOB';   # {{{2
 | |
| package Regexp::Common::balanced; {
 | |
| 
 | |
| use 5.10.0;
 | |
| 
 | |
| use strict;
 | |
| use warnings;
 | |
| no  warnings 'syntax';
 | |
| 
 | |
| use Regexp::Common qw /pattern clean no_defaults/;
 | |
| 
 | |
| our $VERSION = '2017060201';
 | |
| 
 | |
| my %closer = ( '{'=>'}', '('=>')', '['=>']', '<'=>'>' );
 | |
| my %cache;
 | |
| 
 | |
| sub nested {
 | |
|     my ($start, $finish) = @_;
 | |
| 
 | |
|     return $cache {$start} {$finish} if exists $cache {$start} {$finish};
 | |
| 
 | |
|     my @starts   = map {s/\\(.)/$1/g; $_} grep {length}
 | |
|                         $start  =~ /([^|\\]+|\\.)+/gs;
 | |
|     my @finishes = map {s/\\(.)/$1/g; $_} grep {length}
 | |
|                         $finish =~ /([^|\\]+|\\.)+/gs;
 | |
| 
 | |
|     push @finishes => ($finishes [-1]) x (@starts - @finishes);
 | |
| 
 | |
|     my @re;
 | |
|     local $" = "|";
 | |
|     foreach my $begin (@starts) {
 | |
|         my $end = shift @finishes;
 | |
| 
 | |
|         my $qb  = quotemeta $begin;
 | |
|         my $qe  = quotemeta $end;
 | |
|         my $fb  = quotemeta substr $begin => 0, 1;
 | |
|         my $fe  = quotemeta substr $end   => 0, 1;
 | |
| 
 | |
|         my $tb  = quotemeta substr $begin => 1;
 | |
|         my $te  = quotemeta substr $end   => 1;
 | |
| 
 | |
|         my $add;
 | |
|         if ($fb eq $fe) {
 | |
|             push @re =>
 | |
|                    qq /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|(?-1))*$qe)/;
 | |
|         }
 | |
|         else {
 | |
|             my   @clauses =  "(?>[^$fb$fe]+)";
 | |
|             push @clauses => "$fb(?!$tb)" if length $tb;
 | |
|             push @clauses => "$fe(?!$te)" if length $te;
 | |
|             push @clauses => "(?-1)";
 | |
|             push @re      =>  qq /(?:$qb(?:@clauses)*$qe)/;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     $cache {$start} {$finish} = qr /(@re)/;
 | |
| }
 | |
| 
 | |
| 
 | |
| pattern name    => [qw /balanced -parens=() -begin= -end=/],
 | |
|         create  => sub {
 | |
|             my $flag = $_[1];
 | |
|             unless (defined $flag -> {-begin} && length $flag -> {-begin} &&
 | |
|                     defined $flag -> {-end}   && length $flag -> {-end}) {
 | |
|                 my @open  = grep {index ($flag->{-parens}, $_) >= 0}
 | |
|                              ('[','(','{','<');
 | |
|                 my @close = map {$closer {$_}} @open;
 | |
|                 $flag -> {-begin} = join "|" => @open;
 | |
|                 $flag -> {-end}   = join "|" => @close;
 | |
|             }
 | |
|             return nested @$flag {qw /-begin -end/};
 | |
|         },
 | |
|         ;
 | |
| 
 | |
| }
 | |
| 
 | |
| 1;
 | |
| 
 | |
| __END__
 | |
| 
 | |
| =pod
 | |
| 
 | |
| =head1 NAME
 | |
| 
 | |
| Regexp::Common::balanced -- provide regexes for strings with balanced
 | |
| parenthesized delimiters or arbitrary delimiters.
 | |
| 
 | |
| =head1 SYNOPSIS
 | |
| 
 | |
|     use Regexp::Common qw /balanced/;
 | |
| 
 | |
|     while (<>) {
 | |
|         /$RE{balanced}{-parens=>'()'}/
 | |
|                                    and print q{balanced parentheses\n};
 | |
|     }
 | |
| 
 | |
| 
 | |
| =head1 DESCRIPTION
 | |
| 
 | |
| Please consult the manual of L<Regexp::Common> for a general description
 | |
| of the works of this interface.
 | |
| 
 | |
| Do not use this module directly, but load it via I<Regexp::Common>.
 | |
| 
 | |
| =head2 C<$RE{balanced}{-parens}>
 | |
| 
 | |
| Returns a pattern that matches a string that starts with the nominated
 | |
| opening parenthesis or bracket, contains characters and properly nested
 | |
| parenthesized subsequences, and ends in the matching parenthesis.
 | |
| 
 | |
| More than one type of parenthesis can be specified:
 | |
| 
 | |
|         $RE{balanced}{-parens=>'(){}'}
 | |
| 
 | |
| in which case all specified parenthesis types must be correctly balanced within
 | |
| the string.
 | |
| 
 | |
| Since version 2013030901, C<< $1 >> will always be set (to the entire
 | |
| matched substring), regardless whether C<< {-keep} >> is used or not.
 | |
| 
 | |
| =head2 C<< $RE{balanced}{-begin => "begin"}{-end => "end"} >>
 | |
| 
 | |
| Returns a pattern that matches a string that is properly balanced
 | |
| using the I<begin> and I<end> strings as start and end delimiters.
 | |
| Multiple sets of begin and end strings can be given by separating
 | |
| them by C<|>s (which can be escaped with a backslash).
 | |
| 
 | |
|     qr/$RE{balanced}{-begin => "do|if|case"}{-end => "done|fi|esac"}/
 | |
| 
 | |
| will match properly balanced strings that either start with I<do> and
 | |
| end with I<done>, start with I<if> and end with I<fi>, or start with
 | |
| I<case> and end with I<esac>.
 | |
| 
 | |
| If I<-end> contains less cases than I<-begin>, the last case of I<-end>
 | |
| is repeated. If it contains more cases than I<-begin>, the extra cases
 | |
| are ignored. If either of I<-begin> or I<-end> isn't given, or is empty,
 | |
| I<< -begin => '(' >> and I<< -end => ')' >> are assumed.
 | |
| 
 | |
| Since version 2013030901, C<< $1 >> will always be set (to the entire
 | |
| matched substring), regardless whether C<< {-keep} >> is used or not.
 | |
| 
 | |
| =head2 Note
 | |
| 
 | |
| Since version 2013030901 the pattern will make of the recursive construct
 | |
| C<< (?-1) >>, instead of using the problematic C<< (??{ }) >> construct.
 | |
| This fixes an problem that was introduced in the 5.17 development track.
 | |
| 
 | |
| =head1 SEE ALSO
 | |
| 
 | |
| L<Regexp::Common> for a general description of how to use this interface.
 | |
| 
 | |
| =head1 AUTHOR
 | |
| 
 | |
| Damian Conway (damian@conway.org)
 | |
| 
 | |
| =head1 MAINTENANCE
 | |
| 
 | |
| This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>.
 | |
| 
 | |
| =head1 BUGS AND IRRITATIONS
 | |
| 
 | |
| Bound to be plenty.
 | |
| 
 | |
| For a start, there are many common regexes missing.
 | |
| Send them in to I<regexp-common@abigail.be>.
 | |
| 
 | |
| =head1 LICENSE and COPYRIGHT
 | |
| 
 | |
| This software is Copyright (c) 2001 - 2017, Damian Conway and Abigail.
 | |
| 
 | |
| This module is free software, and maybe used under any of the following
 | |
| licenses:
 | |
| 
 | |
|  1) The Perl Artistic License.     See the file COPYRIGHT.AL.
 | |
|  2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
 | |
|  3) The BSD License.               See the file COPYRIGHT.BSD.
 | |
|  4) The MIT License.               See the file COPYRIGHT.MIT.
 | |
| 
 | |
| =cut
 | |
| EOB
 | |
| # 2}}}
 | |
| $Regexp_Common_Contents{'Common/delimited'} = <<'EOD';   # {{{2
 | |
| package Regexp::Common::delimited;
 | |
| 
 | |
| use 5.10.0;
 | |
| 
 | |
| use strict;
 | |
| use warnings;
 | |
| no  warnings 'syntax';
 | |
| 
 | |
| use Regexp::Common qw /pattern clean no_defaults/;
 | |
| 
 | |
| use charnames ':full';
 | |
| 
 | |
| our $VERSION = '2017060201';
 | |
| 
 | |
| sub gen_delimited {
 | |
| 
 | |
|     my ($dels, $escs, $cdels) = @_;
 | |
|     # return '(?:\S*)' unless $dels =~ /\S/;
 | |
|     if (defined $escs && length $escs) {
 | |
|         $escs  .= substr  ($escs, -1) x (length ($dels) - length  ($escs));
 | |
|     }
 | |
|     if (defined $cdels && length $cdels) {
 | |
|         $cdels .= substr ($cdels, -1) x (length ($dels) - length ($cdels));
 | |
|     }
 | |
|     else {
 | |
|         $cdels = $dels;
 | |
|     }
 | |
| 
 | |
|     my @pat = ();
 | |
|     for (my $i = 0; $i < length $dels; $i ++) {
 | |
|         my $del  = quotemeta substr  ($dels, $i, 1);
 | |
|         my $cdel = quotemeta substr ($cdels, $i, 1);
 | |
|         my $esc  = defined $escs && length ($escs)
 | |
|                            ? quotemeta substr ($escs, $i, 1) : "";
 | |
|         if ($cdel eq $esc) {
 | |
|             push @pat =>
 | |
|                 "(?k:$del)(?k:[^$cdel]*(?:(?:$cdel$cdel)[^$cdel]*)*)(?k:$cdel)";
 | |
|         }
 | |
|         elsif (length $esc) {
 | |
|             push @pat =>
 | |
|                 "(?k:$del)(?k:[^$esc$cdel]*(?:$esc.[^$esc$cdel]*)*)(?k:$cdel)";
 | |
|         }
 | |
|         else {
 | |
|             push @pat => "(?k:$del)(?k:[^$cdel]*)(?k:$cdel)";
 | |
|         }
 | |
|     }
 | |
|     my $pat = join '|', @pat;
 | |
|     return "(?k:(?|$pat))";
 | |
| }
 | |
| 
 | |
| sub _croak {
 | |
|     require Carp;
 | |
|     goto &Carp::croak;
 | |
| }
 | |
| 
 | |
| pattern name    => [qw( delimited -delim= -esc=\\ -cdelim= )],
 | |
|         create  => sub {my $flags = $_[1];
 | |
|                         _croak 'Must specify delimiter in $RE{delimited}'
 | |
|                               unless length $flags->{-delim};
 | |
|                         return gen_delimited (@{$flags}{-delim, -esc, -cdelim});
 | |
|                    },
 | |
|         ;
 | |
| 
 | |
| pattern name    => [qw( quoted -esc=\\ )],
 | |
|         create  => sub {my $flags = $_[1];
 | |
|                         return gen_delimited (q{"'`}, $flags -> {-esc});
 | |
|                    },
 | |
|         ;
 | |
| 
 | |
| 
 | |
| my @bracket_pairs;
 | |
| if ($] >= 5.014) {
 | |
|     #
 | |
|     # List from http://xahlee.info/comp/unicode_matching_brackets.html
 | |
|     #
 | |
|     @bracket_pairs =
 | |
|         map {ref $_ ? $_ :
 | |
|                 /!/ ? [(do {my $x = $_; $x =~ s/!/TOP/;    $x},
 | |
|                         do {my $x = $_; $x =~ s/!/BOTTOM/; $x})]
 | |
|                     : [(do {my $x = $_; $x =~ s/\?/LEFT/;  $x},
 | |
|                         do {my $x = $_; $x =~ s/\?/RIGHT/; $x})]}
 | |
|             "? PARENTHESIS",
 | |
|             "? SQUARE BRACKET",
 | |
|             "? CURLY BRACKET",
 | |
|             "? DOUBLE QUOTATION MARK",
 | |
|             "? SINGLE QUOTATION MARK",
 | |
|             "SINGLE ?-POINTING ANGLE QUOTATION MARK",
 | |
|             "?-POINTING DOUBLE ANGLE QUOTATION MARK",
 | |
|             "FULLWIDTH ? PARENTHESIS",
 | |
|             "FULLWIDTH ? SQUARE BRACKET",
 | |
|             "FULLWIDTH ? CURLY BRACKET",
 | |
|             "FULLWIDTH ? WHITE PARENTHESIS",
 | |
|             "? WHITE PARENTHESIS",
 | |
|             "? WHITE SQUARE BRACKET",
 | |
|             "? WHITE CURLY BRACKET",
 | |
|             "? CORNER BRACKET",
 | |
|             "? ANGLE BRACKET",
 | |
|             "? DOUBLE ANGLE BRACKET",
 | |
|             "? BLACK LENTICULAR BRACKET",
 | |
|             "? TORTOISE SHELL BRACKET",
 | |
|             "? BLACK TORTOISE SHELL BRACKET",
 | |
|             "? WHITE CORNER BRACKET",
 | |
|             "? WHITE LENTICULAR BRACKET",
 | |
|             "? WHITE TORTOISE SHELL BRACKET",
 | |
|             "HALFWIDTH ? CORNER BRACKET",
 | |
|             "MATHEMATICAL ? WHITE SQUARE BRACKET",
 | |
|             "MATHEMATICAL ? ANGLE BRACKET",
 | |
|             "MATHEMATICAL ? DOUBLE ANGLE BRACKET",
 | |
|             "MATHEMATICAL ? FLATTENED PARENTHESIS",
 | |
|             "MATHEMATICAL ? WHITE TORTOISE SHELL BRACKET",
 | |
|             "? CEILING",
 | |
|             "? FLOOR",
 | |
|             "Z NOTATION ? IMAGE BRACKET",
 | |
|             "Z NOTATION ? BINDING BRACKET",
 | |
|             [   "HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT",
 | |
|                 "HEAVY SINGLE " .   "COMMA QUOTATION MARK ORNAMENT", ],
 | |
|             [   "HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT",
 | |
|                 "HEAVY DOUBLE " .   "COMMA QUOTATION MARK ORNAMENT", ],
 | |
|             "MEDIUM ? PARENTHESIS ORNAMENT",
 | |
|             "MEDIUM FLATTENED ? PARENTHESIS ORNAMENT",
 | |
|             "MEDIUM ? CURLY BRACKET ORNAMENT",
 | |
|             "MEDIUM ?-POINTING ANGLE BRACKET ORNAMENT",
 | |
|             "HEAVY ?-POINTING ANGLE QUOTATION MARK ORNAMENT",
 | |
|             "HEAVY ?-POINTING ANGLE BRACKET ORNAMENT",
 | |
|             "LIGHT ? TORTOISE SHELL BRACKET ORNAMENT",
 | |
|             "ORNATE ? PARENTHESIS",
 | |
|             "! PARENTHESIS",
 | |
|             "! SQUARE BRACKET",
 | |
|             "! CURLY BRACKET",
 | |
|             "! TORTOISE SHELL BRACKET",
 | |
|             "PRESENTATION FORM FOR VERTICAL ? CORNER BRACKET",
 | |
|             "PRESENTATION FORM FOR VERTICAL ? WHITE CORNER BRACKET",
 | |
|             "PRESENTATION FORM FOR VERTICAL ? TORTOISE SHELL BRACKET",
 | |
|             "PRESENTATION FORM FOR VERTICAL ? BLACK LENTICULAR BRACKET",
 | |
|             "PRESENTATION FORM FOR VERTICAL ? WHITE LENTICULAR BRACKET",
 | |
|             "PRESENTATION FORM FOR VERTICAL ? ANGLE BRACKET",
 | |
|             "PRESENTATION FORM FOR VERTICAL ? DOUBLE ANGLE BRACKET",
 | |
|             "PRESENTATION FORM FOR VERTICAL ? SQUARE BRACKET",
 | |
|             "PRESENTATION FORM FOR VERTICAL ? CURLY BRACKET",
 | |
|             "?-POINTING ANGLE BRACKET",
 | |
|             "? ANGLE BRACKET WITH DOT",
 | |
|             "?-POINTING CURVED ANGLE BRACKET",
 | |
|             "SMALL ? PARENTHESIS",
 | |
|             "SMALL ? CURLY BRACKET",
 | |
|             "SMALL ? TORTOISE SHELL BRACKET",
 | |
|             "SUPERSCRIPT ? PARENTHESIS",
 | |
|             "SUBSCRIPT ? PARENTHESIS",
 | |
|             "? SQUARE BRACKET WITH UNDERBAR",
 | |
|             [    "LEFT SQUARE BRACKET WITH TICK IN TOP CORNER",
 | |
|                 "RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER", ],
 | |
|             [    "LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER",
 | |
|                 "RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER", ],
 | |
|             "? SQUARE BRACKET WITH QUILL",
 | |
|             "TOP ? HALF BRACKET",
 | |
|             "BOTTOM ? HALF BRACKET",
 | |
|             "? S-SHAPED BAG DELIMITER",
 | |
|             [    "LEFT ARC LESS-THAN BRACKET",
 | |
|                 "RIGHT ARC GREATER-THAN BRACKET",  ],
 | |
|             [    "DOUBLE LEFT ARC GREATER-THAN BRACKET",
 | |
|                 "DOUBLE RIGHT ARC LESS-THAN BRACKET",  ],
 | |
|             "? SIDEWAYS U BRACKET",
 | |
|             "? DOUBLE PARENTHESIS",
 | |
|             "? WIGGLY FENCE",
 | |
|             "? DOUBLE WIGGLY FENCE",
 | |
|             "? LOW PARAPHRASE BRACKET",
 | |
|             "? RAISED OMISSION BRACKET",
 | |
|             "? SUBSTITUTION BRACKET",
 | |
|             "? DOTTED SUBSTITUTION BRACKET",
 | |
|             "? TRANSPOSITION BRACKET",
 | |
|             [   "OGHAM FEATHER MARK",
 | |
|                 "OGHAM REVERSED FEATHER MARK",  ],
 | |
|             [   "TIBETAN MARK GUG RTAGS GYON",
 | |
|                 "TIBETAN MARK GUG RTAGS GYAS",  ],
 | |
|             [   "TIBETAN MARK ANG KHANG GYON",
 | |
|                 "TIBETAN MARK ANG KHANG GYAS",  ],
 | |
|     ;
 | |
| 
 | |
|     #
 | |
|     # Filter out unknown characters; this may run on an older version
 | |
|     # of Perl with an old version of Unicode.
 | |
|     #
 | |
|     @bracket_pairs = grep {defined charnames::string_vianame ($$_ [0]) &&
 | |
|                            defined charnames::string_vianame ($$_ [1])}
 | |
|                      @bracket_pairs;
 | |
| 
 | |
|     if (@bracket_pairs) {
 | |
|         my  $delims = join "" => map {charnames::string_vianame ($$_ [0])}
 | |
|                                      @bracket_pairs;
 | |
|         my $cdelims = join "" => map {charnames::string_vianame ($$_ [1])}
 | |
|                                      @bracket_pairs;
 | |
| 
 | |
|         pattern name   => [qw (bquoted -esc=\\)],
 | |
|                 create => sub {my $flags = $_ [1];
 | |
|                                return gen_delimited ($delims, $flags -> {-esc},
 | |
|                                                     $cdelims);
 | |
|                           },
 | |
|                 version => 5.014,
 | |
|                 ;
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| #
 | |
| # Return the Unicode names of the pairs of matching delimiters.
 | |
| #
 | |
| sub bracket_pairs {@bracket_pairs}
 | |
| 
 | |
| 1;
 | |
| 
 | |
| __END__
 | |
| 
 | |
| =pod
 | |
| 
 | |
| =head1 NAME
 | |
| 
 | |
| Regexp::Common::delimited -- provides a regex for delimited strings
 | |
| 
 | |
| =head1 SYNOPSIS
 | |
| 
 | |
|     use Regexp::Common qw /delimited/;
 | |
| 
 | |
|     while (<>) {
 | |
|         /$RE{delimited}{-delim=>'"'}/  and print 'a \" delimited string';
 | |
|         /$RE{delimited}{-delim=>'/'}/  and print 'a \/ delimited string';
 | |
|     }
 | |
| 
 | |
| 
 | |
| =head1 DESCRIPTION
 | |
| 
 | |
| Please consult the manual of L<Regexp::Common> for a general description
 | |
| of the works of this interface.
 | |
| 
 | |
| Do not use this module directly, but load it via I<Regexp::Common>.
 | |
| 
 | |
| =head2 C<$RE{delimited}{-delim}{-cdelim}{-esc}>
 | |
| 
 | |
| Returns a pattern that matches a single-character-delimited substring,
 | |
| with optional internal escaping of the delimiter.
 | |
| 
 | |
| When C<-delim => I<S>> is specified, each character in the sequence I<S> is
 | |
| a possible delimiter. There is no default delimiter, so this flag must always
 | |
| be specified.
 | |
| 
 | |
| By default, the closing delimiter is the same character as the opening
 | |
| delimiter. If this is not wanted, for instance, if you want to match
 | |
| a string with symmetric delimiters, you can specify the closing delimiter(s)
 | |
| with C<-cdelim => I<S>>. Each character in I<S> is matched with the
 | |
| corresponding character supplied with the C<-delim> option. If the C<-cdelim>
 | |
| option has less characters than the C<-delim> option, the last character
 | |
| is repeated as often as necessary. If the C<-cdelim> option has more
 | |
| characters than the C<-delim> option, the extra characters are ignored.
 | |
| 
 | |
| If C<-esc => I<S>> is specified, each character in the sequence I<S> is
 | |
| the delimiter for the corresponding character in the C<-delim=I<S>> list.
 | |
| The default escape is backslash.
 | |
| 
 | |
| For example:
 | |
| 
 | |
|    $RE{delimited}{-delim=>'"'}               # match "a \" delimited string"
 | |
|    $RE{delimited}{-delim=>'"'}{-esc=>'"'}    # match "a "" delimited string"
 | |
|    $RE{delimited}{-delim=>'/'}               # match /a \/ delimited string/
 | |
|    $RE{delimited}{-delim=>q{'"}}             # match "string" or 'string'
 | |
|    $RE{delimited}{-delim=>"("}{-cdelim=>")"} # match (string)
 | |
| 
 | |
| Under C<-keep> (See L<Regexp::Common>):
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item $1
 | |
| 
 | |
| captures the entire match
 | |
| 
 | |
| =item $2
 | |
| 
 | |
| captures the opening delimiter
 | |
| 
 | |
| =item $3
 | |
| 
 | |
| captures delimited portion of the string
 | |
| 
 | |
| =item $4
 | |
| 
 | |
| captures the closing delimiter
 | |
| 
 | |
| =back
 | |
| 
 | |
| =head2 $RE{quoted}{-esc}
 | |
| 
 | |
| A synonym for C<< $RE {delimited} {-delim => q {'"`}} {...} >>.
 | |
| 
 | |
| =head2 $RE {bquoted} {-esc}
 | |
| 
 | |
| This is a pattern which matches delimited strings, where the delimiters
 | |
| are a set of matching brackets. Currently, this comes 85 pairs. This
 | |
| includes the 60 pairs of bidirection paired brackets, as listed
 | |
| in L<< http://www.unicode.org/Public/UNIDATA/BidiBrackets.txt >>.
 | |
| 
 | |
| The other 25 pairs are the quotation marks, the double quotation
 | |
| marks, the single and double pointing quoation marks, the heavy
 | |
| single and double commas, 4 pairs of top-bottom parenthesis and
 | |
| brackets, 9 pairs of presentation form for vertical brackets,
 | |
| and the low paraphrase, raised omission, substitution, double
 | |
| substitution, and transposition brackets.
 | |
| 
 | |
| In a future update, pairs may be added (or deleted).
 | |
| 
 | |
| This pattern requires perl 5.14.0 or higher.
 | |
| 
 | |
| For a full list of bracket pairs, inspect the output of
 | |
| C<< Regexp::Common::delimited::bracket_pair () >>, which returns
 | |
| a list of two element arrays, each holding the Unicode names of
 | |
| matching pair of delimiters.
 | |
| 
 | |
| The C<< {-esc => I<S> } >> works as in the C<< $RE {delimited} >> pattern.
 | |
| 
 | |
| If C<< {-keep} >> is given, the following things will be captured:
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| =item $1
 | |
| 
 | |
| captures the entire match
 | |
| 
 | |
| =item $2
 | |
| 
 | |
| captures the opening delimiter
 | |
| 
 | |
| =item $3
 | |
| 
 | |
| captures delimited portion of the string
 | |
| 
 | |
| =item $4
 | |
| 
 | |
| captures the closing delimiter
 | |
| 
 | |
| =back
 | |
| 
 | |
| =head1 SEE ALSO
 | |
| 
 | |
| L<Regexp::Common> for a general description of how to use this interface.
 | |
| 
 | |
| =head1 AUTHOR
 | |
| 
 | |
| Damian Conway (damian@conway.org)
 | |
| 
 | |
| =head1 MAINTENANCE
 | |
| 
 | |
| This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>.
 | |
| 
 | |
| =head1 BUGS AND IRRITATIONS
 | |
| 
 | |
| Bound to be plenty.
 | |
| 
 | |
| For a start, there are many common regexes missing.
 | |
| Send them in to I<regexp-common@abigail.be>.
 | |
| 
 | |
| =head1 LICENSE and COPYRIGHT
 | |
| 
 | |
| This software is Copyright (c) 2001 - 2017, Damian Conway and Abigail.
 | |
| 
 | |
| This module is free software, and maybe used under any of the following
 | |
| licenses:
 | |
| 
 | |
|  1) The Perl Artistic License.     See the file COPYRIGHT.AL.
 | |
|  2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
 | |
|  3) The BSD License.               See the file COPYRIGHT.BSD.
 | |
|  4) The MIT License.               See the file COPYRIGHT.MIT.
 | |
| 
 | |
| =cut
 | |
| EOD
 | |
| # 2}}}
 | |
|     my $problems        = 0;
 | |
|     $HAVE_Rexexp_Common = 0;
 | |
|     my $dir             = "";
 | |
|     if ($opt_sdir) {
 | |
|         ++$TEMP_OFF;
 | |
|         $dir = "$opt_sdir/$TEMP_OFF";
 | |
|         File::Path::rmtree($dir) if     is_dir($dir);
 | |
|         File::Path::mkpath($dir) unless is_dir($dir);
 | |
|     } else {
 | |
|         # let File::Temp create a suitable temporary directory
 | |
|         $dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
 | |
|         $TEMP_INST{ $dir } = "Regexp::Common";
 | |
|     }
 | |
|     print "Using temp dir [$dir] to install Regexp::Common\n" if $opt_v;
 | |
|     my $Regexp_dir        = "$dir/Regexp";
 | |
|     my $Regexp_Common_dir = "$dir/Regexp/Common";
 | |
|     mkdir $Regexp_dir       ;
 | |
|     mkdir $Regexp_Common_dir;
 | |
| 
 | |
|     foreach my $module_file (keys %Regexp_Common_Contents) {
 | |
|         my $OUT = open_file('>', "$dir/Regexp/${module_file}.pm", 1);
 | |
|         if (defined $OUT) {
 | |
|             print $OUT $Regexp_Common_Contents{$module_file};
 | |
|             $OUT->close;
 | |
|         } else {
 | |
|             warn "Failed to install Regexp::${module_file}.pm\n";
 | |
|             $problems = 1;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     push @INC, $dir;
 | |
|     eval "use Regexp::Common qw /comment RE_comment_HTML balanced/";
 | |
|     $HAVE_Rexexp_Common = 1 unless $problems;
 | |
| } # 1}}}
 | |
| sub Install_Algorithm_Diff {                 # {{{1
 | |
|     # Installs Tye McQueen's Algorithm::Diff module, v1.1902, into a
 | |
|     # temporary directory for the duration of this run.
 | |
| 
 | |
| my $Algorithm_Diff_Contents = <<'EOAlgDiff'; # {{{2
 | |
| package Algorithm::Diff;
 | |
| # Skip to first "=head" line for documentation.
 | |
| use strict;
 | |
| 
 | |
| use integer;    # see below in _replaceNextLargerWith() for mod to make
 | |
|                 # if you don't use this
 | |
| use vars qw( $VERSION @EXPORT_OK );
 | |
| $VERSION = 1.19_02;
 | |
| #          ^ ^^ ^^-- Incremented at will
 | |
| #          | \+----- Incremented for non-trivial changes to features
 | |
| #          \-------- Incremented for fundamental changes
 | |
| require Exporter;
 | |
| *import    = \&Exporter::import;
 | |
| @EXPORT_OK = qw(
 | |
|     prepare LCS LCSidx LCS_length
 | |
|     diff sdiff compact_diff
 | |
|     traverse_sequences traverse_balanced
 | |
| );
 | |
| 
 | |
| # McIlroy-Hunt diff algorithm
 | |
| # Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com>
 | |
| # by Ned Konz, perl@bike-nomad.com
 | |
| # Updates by Tye McQueen, http://perlmonks.org/?node=tye
 | |
| 
 | |
| # Create a hash that maps each element of $aCollection to the set of
 | |
| # positions it occupies in $aCollection, restricted to the elements
 | |
| # within the range of indexes specified by $start and $end.
 | |
| # The fourth parameter is a subroutine reference that will be called to
 | |
| # generate a string to use as a key.
 | |
| # Additional parameters, if any, will be passed to this subroutine.
 | |
| #
 | |
| # my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen );
 | |
| 
 | |
| sub _withPositionsOfInInterval
 | |
| {
 | |
|     my $aCollection = shift;    # array ref
 | |
|     my $start       = shift;
 | |
|     my $end         = shift;
 | |
|     my $keyGen      = shift;
 | |
|     my %d;
 | |
|     my $index;
 | |
|     for ( $index = $start ; $index <= $end ; $index++ )
 | |
|     {
 | |
|         my $element = $aCollection->[$index];
 | |
|         my $key = &$keyGen( $element, @_ );
 | |
|         if ( exists( $d{$key} ) )
 | |
|         {
 | |
|             unshift ( @{ $d{$key} }, $index );
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             $d{$key} = [$index];
 | |
|         }
 | |
|     }
 | |
|     return wantarray ? %d : \%d;
 | |
| }
 | |
| 
 | |
| # Find the place at which aValue would normally be inserted into the
 | |
| # array. If that place is already occupied by aValue, do nothing, and
 | |
| # return undef. If the place does not exist (i.e., it is off the end of
 | |
| # the array), add it to the end, otherwise replace the element at that
 | |
| # point with aValue.  It is assumed that the array's values are numeric.
 | |
| # This is where the bulk (75%) of the time is spent in this module, so
 | |
| # try to make it fast!
 | |
| 
 | |
| sub _replaceNextLargerWith
 | |
| {
 | |
|     my ( $array, $aValue, $high ) = @_;
 | |
|     $high ||= $#$array;
 | |
| 
 | |
|     # off the end?
 | |
|     if ( $high == -1 || $aValue > $array->[-1] )
 | |
|     {
 | |
|         push ( @$array, $aValue );
 | |
|         return $high + 1;
 | |
|     }
 | |
| 
 | |
|     # binary search for insertion point...
 | |
|     my $low = 0;
 | |
|     my $index;
 | |
|     my $found;
 | |
|     while ( $low <= $high )
 | |
|     {
 | |
|         $index = ( $high + $low ) / 2;
 | |
| 
 | |
|         # $index = int(( $high + $low ) / 2);  # without 'use integer'
 | |
|         $found = $array->[$index];
 | |
| 
 | |
|         if ( $aValue == $found )
 | |
|         {
 | |
|             return undef;
 | |
|         }
 | |
|         elsif ( $aValue > $found )
 | |
|         {
 | |
|             $low = $index + 1;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             $high = $index - 1;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     # now insertion point is in $low.
 | |
|     $array->[$low] = $aValue;    # overwrite next larger
 | |
|     return $low;
 | |
| }
 | |
| 
 | |
| # This method computes the longest common subsequence in $a and $b.
 | |
| 
 | |
| # Result is array or ref, whose contents is such that
 | |
| #   $a->[ $i ] == $b->[ $result[ $i ] ]
 | |
| # foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined.
 | |
| 
 | |
| # An additional argument may be passed; this is a hash or key generating
 | |
| # function that should return a string that uniquely identifies the given
 | |
| # element.  It should be the case that if the key is the same, the elements
 | |
| # will compare the same. If this parameter is undef or missing, the key
 | |
| # will be the element as a string.
 | |
| 
 | |
| # By default, comparisons will use "eq" and elements will be turned into keys
 | |
| # using the default stringizing operator '""'.
 | |
| 
 | |
| # Additional parameters, if any, will be passed to the key generation
 | |
| # routine.
 | |
| 
 | |
| sub _longestCommonSubsequence
 | |
| {
 | |
|     my $a        = shift;    # array ref or hash ref
 | |
|     my $b        = shift;    # array ref or hash ref
 | |
|     my $counting = shift;    # scalar
 | |
|     my $keyGen   = shift;    # code ref
 | |
|     my $compare;             # code ref
 | |
| 
 | |
|     if ( ref($a) eq 'HASH' )
 | |
|     {                        # prepared hash must be in $b
 | |
|         my $tmp = $b;
 | |
|         $b = $a;
 | |
|         $a = $tmp;
 | |
|     }
 | |
| 
 | |
|     # Check for bogus (non-ref) argument values
 | |
|     if ( !ref($a) || !ref($b) )
 | |
|     {
 | |
|         my @callerInfo = caller(1);
 | |
|         die 'error: must pass array or hash references to ' . $callerInfo[3];
 | |
|     }
 | |
| 
 | |
|     # set up code refs
 | |
|     # Note that these are optimized.
 | |
|     if ( !defined($keyGen) )    # optimize for strings
 | |
|     {
 | |
|         $keyGen = sub { $_[0] };
 | |
|         $compare = sub { my ( $a, $b ) = @_; $a eq $b };
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         $compare = sub {
 | |
|             my $a = shift;
 | |
|             my $b = shift;
 | |
|             &$keyGen( $a, @_ ) eq &$keyGen( $b, @_ );
 | |
|         };
 | |
|     }
 | |
| 
 | |
|     my ( $aStart, $aFinish, $matchVector ) = ( 0, $#$a, [] );
 | |
|     my ( $prunedCount, $bMatches ) = ( 0, {} );
 | |
| 
 | |
|     if ( ref($b) eq 'HASH' )    # was $bMatches prepared for us?
 | |
|     {
 | |
|         $bMatches = $b;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         my ( $bStart, $bFinish ) = ( 0, $#$b );
 | |
| 
 | |
|         # First we prune off any common elements at the beginning
 | |
|         while ( $aStart <= $aFinish
 | |
|             and $bStart <= $bFinish
 | |
|             and &$compare( $a->[$aStart], $b->[$bStart], @_ ) )
 | |
|         {
 | |
|             $matchVector->[ $aStart++ ] = $bStart++;
 | |
|             $prunedCount++;
 | |
|         }
 | |
| 
 | |
|         # now the end
 | |
|         while ( $aStart <= $aFinish
 | |
|             and $bStart <= $bFinish
 | |
|             and &$compare( $a->[$aFinish], $b->[$bFinish], @_ ) )
 | |
|         {
 | |
|             $matchVector->[ $aFinish-- ] = $bFinish--;
 | |
|             $prunedCount++;
 | |
|         }
 | |
| 
 | |
|         # Now compute the equivalence classes of positions of elements
 | |
|         $bMatches =
 | |
|           _withPositionsOfInInterval( $b, $bStart, $bFinish, $keyGen, @_ );
 | |
|     }
 | |
|     my $thresh = [];
 | |
|     my $links  = [];
 | |
| 
 | |
|     my ( $i, $ai, $j, $k );
 | |
|     for ( $i = $aStart ; $i <= $aFinish ; $i++ )
 | |
|     {
 | |
|         $ai = &$keyGen( $a->[$i], @_ );
 | |
|         if ( exists( $bMatches->{$ai} ) )
 | |
|         {
 | |
|             $k = 0;
 | |
|             for $j ( @{ $bMatches->{$ai} } )
 | |
|             {
 | |
| 
 | |
|                 # optimization: most of the time this will be true
 | |
|                 if ( $k and $thresh->[$k] > $j and $thresh->[ $k - 1 ] < $j )
 | |
|                 {
 | |
|                     $thresh->[$k] = $j;
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     $k = _replaceNextLargerWith( $thresh, $j, $k );
 | |
|                 }
 | |
| 
 | |
|                 # oddly, it's faster to always test this (CPU cache?).
 | |
|                 if ( defined($k) )
 | |
|                 {
 | |
|                     $links->[$k] =
 | |
|                       [ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ];
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (@$thresh)
 | |
|     {
 | |
|         return $prunedCount + @$thresh if $counting;
 | |
|         for ( my $link = $links->[$#$thresh] ; $link ; $link = $link->[0] )
 | |
|         {
 | |
|             $matchVector->[ $link->[1] ] = $link->[2];
 | |
|         }
 | |
|     }
 | |
|     elsif ($counting)
 | |
|     {
 | |
|         return $prunedCount;
 | |
|     }
 | |
| 
 | |
|     return wantarray ? @$matchVector : $matchVector;
 | |
| }
 | |
| 
 | |
| sub traverse_sequences
 | |
| {
 | |
|     my $a                 = shift;          # array ref
 | |
|     my $b                 = shift;          # array ref
 | |
|     my $callbacks         = shift || {};
 | |
|     my $keyGen            = shift;
 | |
|     my $matchCallback     = $callbacks->{'MATCH'} || sub { };
 | |
|     my $discardACallback  = $callbacks->{'DISCARD_A'} || sub { };
 | |
|     my $finishedACallback = $callbacks->{'A_FINISHED'};
 | |
|     my $discardBCallback  = $callbacks->{'DISCARD_B'} || sub { };
 | |
|     my $finishedBCallback = $callbacks->{'B_FINISHED'};
 | |
|     my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ );
 | |
| 
 | |
|     # Process all the lines in @$matchVector
 | |
|     my $lastA = $#$a;
 | |
|     my $lastB = $#$b;
 | |
|     my $bi    = 0;
 | |
|     my $ai;
 | |
| 
 | |
|     for ( $ai = 0 ; $ai <= $#$matchVector ; $ai++ )
 | |
|     {
 | |
|         my $bLine = $matchVector->[$ai];
 | |
|         if ( defined($bLine) )    # matched
 | |
|         {
 | |
|             &$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine;
 | |
|             &$matchCallback( $ai,    $bi++, @_ );
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             &$discardACallback( $ai, $bi, @_ );
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     # The last entry (if any) processed was a match.
 | |
|     # $ai and $bi point just past the last matching lines in their sequences.
 | |
| 
 | |
|     while ( $ai <= $lastA or $bi <= $lastB )
 | |
|     {
 | |
| 
 | |
|         # last A?
 | |
|         if ( $ai == $lastA + 1 and $bi <= $lastB )
 | |
|         {
 | |
|             if ( defined($finishedACallback) )
 | |
|             {
 | |
|                 &$finishedACallback( $lastA, @_ );
 | |
|                 $finishedACallback = undef;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 &$discardBCallback( $ai, $bi++, @_ ) while $bi <= $lastB;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         # last B?
 | |
|         if ( $bi == $lastB + 1 and $ai <= $lastA )
 | |
|         {
 | |
|             if ( defined($finishedBCallback) )
 | |
|             {
 | |
|                 &$finishedBCallback( $lastB, @_ );
 | |
|                 $finishedBCallback = undef;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 &$discardACallback( $ai++, $bi, @_ ) while $ai <= $lastA;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         &$discardACallback( $ai++, $bi, @_ ) if $ai <= $lastA;
 | |
|         &$discardBCallback( $ai, $bi++, @_ ) if $bi <= $lastB;
 | |
|     }
 | |
| 
 | |
|     return 1;
 | |
| }
 | |
| 
 | |
| sub traverse_balanced
 | |
| {
 | |
|     my $a                 = shift;              # array ref
 | |
|     my $b                 = shift;              # array ref
 | |
|     my $callbacks         = shift || {};
 | |
|     my $keyGen            = shift;
 | |
|     my $matchCallback     = $callbacks->{'MATCH'} || sub { };
 | |
|     my $discardACallback  = $callbacks->{'DISCARD_A'} || sub { };
 | |
|     my $discardBCallback  = $callbacks->{'DISCARD_B'} || sub { };
 | |
|     my $changeCallback    = $callbacks->{'CHANGE'};
 | |
|     my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ );
 | |
| 
 | |
|     # Process all the lines in match vector
 | |
|     my $lastA = $#$a;
 | |
|     my $lastB = $#$b;
 | |
|     my $bi    = 0;
 | |
|     my $ai    = 0;
 | |
|     my $ma    = -1;
 | |
|     my $mb;
 | |
| 
 | |
|     while (1)
 | |
|     {
 | |
| 
 | |
|         # Find next match indices $ma and $mb
 | |
|         do {
 | |
|             $ma++;
 | |
|         } while(
 | |
|                 $ma <= $#$matchVector
 | |
|             &&  !defined $matchVector->[$ma]
 | |
|         );
 | |
| 
 | |
|         last if $ma > $#$matchVector;    # end of matchVector?
 | |
|         $mb = $matchVector->[$ma];
 | |
| 
 | |
|         # Proceed with discard a/b or change events until
 | |
|         # next match
 | |
|         while ( $ai < $ma || $bi < $mb )
 | |
|         {
 | |
| 
 | |
|             if ( $ai < $ma && $bi < $mb )
 | |
|             {
 | |
| 
 | |
|                 # Change
 | |
|                 if ( defined $changeCallback )
 | |
|                 {
 | |
|                     &$changeCallback( $ai++, $bi++, @_ );
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     &$discardACallback( $ai++, $bi, @_ );
 | |
|                     &$discardBCallback( $ai, $bi++, @_ );
 | |
|                 }
 | |
|             }
 | |
|             elsif ( $ai < $ma )
 | |
|             {
 | |
|                 &$discardACallback( $ai++, $bi, @_ );
 | |
|             }
 | |
|             else
 | |
|             {
 | |
| 
 | |
|                 # $bi < $mb
 | |
|                 &$discardBCallback( $ai, $bi++, @_ );
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         # Match
 | |
|         &$matchCallback( $ai++, $bi++, @_ );
 | |
|     }
 | |
| 
 | |
|     while ( $ai <= $lastA || $bi <= $lastB )
 | |
|     {
 | |
|         if ( $ai <= $lastA && $bi <= $lastB )
 | |
|         {
 | |
| 
 | |
|             # Change
 | |
|             if ( defined $changeCallback )
 | |
|             {
 | |
|                 &$changeCallback( $ai++, $bi++, @_ );
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 &$discardACallback( $ai++, $bi, @_ );
 | |
|                 &$discardBCallback( $ai, $bi++, @_ );
 | |
|             }
 | |
|         }
 | |
|         elsif ( $ai <= $lastA )
 | |
|         {
 | |
|             &$discardACallback( $ai++, $bi, @_ );
 | |
|         }
 | |
|         else
 | |
|         {
 | |
| 
 | |
|             # $bi <= $lastB
 | |
|             &$discardBCallback( $ai, $bi++, @_ );
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return 1;
 | |
| }
 | |
| 
 | |
| sub prepare
 | |
| {
 | |
|     my $a       = shift;    # array ref
 | |
|     my $keyGen  = shift;    # code ref
 | |
| 
 | |
|     # set up code ref
 | |
|     $keyGen = sub { $_[0] } unless defined($keyGen);
 | |
| 
 | |
|     return scalar _withPositionsOfInInterval( $a, 0, $#$a, $keyGen, @_ );
 | |
| }
 | |
| 
 | |
| sub LCS
 | |
| {
 | |
|     my $a = shift;                  # array ref
 | |
|     my $b = shift;                  # array ref or hash ref
 | |
|     my $matchVector = _longestCommonSubsequence( $a, $b, 0, @_ );
 | |
|     my @retval;
 | |
|     my $i;
 | |
|     for ( $i = 0 ; $i <= $#$matchVector ; $i++ )
 | |
|     {
 | |
|         if ( defined( $matchVector->[$i] ) )
 | |
|         {
 | |
|             push ( @retval, $a->[$i] );
 | |
|         }
 | |
|     }
 | |
|     return wantarray ? @retval : \@retval;
 | |
| }
 | |
| 
 | |
| sub LCS_length
 | |
| {
 | |
|     my $a = shift;                          # array ref
 | |
|     my $b = shift;                          # array ref or hash ref
 | |
|     return _longestCommonSubsequence( $a, $b, 1, @_ );
 | |
| }
 | |
| 
 | |
| sub LCSidx
 | |
| {
 | |
|     my $a= shift @_;
 | |
|     my $b= shift @_;
 | |
|     my $match= _longestCommonSubsequence( $a, $b, 0, @_ );
 | |
|     my @am= grep defined $match->[$_], 0..$#$match;
 | |
|     my @bm= @{$match}[@am];
 | |
|     return \@am, \@bm;
 | |
| }
 | |
| 
 | |
| sub compact_diff
 | |
| {
 | |
|     my $a= shift @_;
 | |
|     my $b= shift @_;
 | |
|     my( $am, $bm )= LCSidx( $a, $b, @_ );
 | |
|     my @cdiff;
 | |
|     my( $ai, $bi )= ( 0, 0 );
 | |
|     push @cdiff, $ai, $bi;
 | |
|     while( 1 ) {
 | |
|         while(  @$am  &&  $ai == $am->[0]  &&  $bi == $bm->[0]  ) {
 | |
|             shift @$am;
 | |
|             shift @$bm;
 | |
|             ++$ai, ++$bi;
 | |
|         }
 | |
|         push @cdiff, $ai, $bi;
 | |
|         last   if  ! @$am;
 | |
|         $ai = $am->[0];
 | |
|         $bi = $bm->[0];
 | |
|         push @cdiff, $ai, $bi;
 | |
|     }
 | |
|     push @cdiff, 0+@$a, 0+@$b
 | |
|         if  $ai < @$a || $bi < @$b;
 | |
|     return wantarray ? @cdiff : \@cdiff;
 | |
| }
 | |
| 
 | |
| sub diff
 | |
| {
 | |
|     my $a      = shift;    # array ref
 | |
|     my $b      = shift;    # array ref
 | |
|     my $retval = [];
 | |
|     my $hunk   = [];
 | |
|     my $discard = sub {
 | |
|         push @$hunk, [ '-', $_[0], $a->[ $_[0] ] ];
 | |
|     };
 | |
|     my $add = sub {
 | |
|         push @$hunk, [ '+', $_[1], $b->[ $_[1] ] ];
 | |
|     };
 | |
|     my $match = sub {
 | |
|         push @$retval, $hunk
 | |
|             if 0 < @$hunk;
 | |
|         $hunk = []
 | |
|     };
 | |
|     traverse_sequences( $a, $b,
 | |
|         { MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add }, @_ );
 | |
|     &$match();
 | |
|     return wantarray ? @$retval : $retval;
 | |
| }
 | |
| 
 | |
| sub sdiff
 | |
| {
 | |
|     my $a      = shift;    # array ref
 | |
|     my $b      = shift;    # array ref
 | |
|     my $retval = [];
 | |
|     my $discard = sub { push ( @$retval, [ '-', $a->[ $_[0] ], "" ] ) };
 | |
|     my $add = sub { push ( @$retval, [ '+', "", $b->[ $_[1] ] ] ) };
 | |
|     my $change = sub {
 | |
|         push ( @$retval, [ 'c', $a->[ $_[0] ], $b->[ $_[1] ] ] );
 | |
|     };
 | |
|     my $match = sub {
 | |
|         push ( @$retval, [ 'u', $a->[ $_[0] ], $b->[ $_[1] ] ] );
 | |
|     };
 | |
|     traverse_balanced(
 | |
|         $a,
 | |
|         $b,
 | |
|         {
 | |
|             MATCH     => $match,
 | |
|             DISCARD_A => $discard,
 | |
|             DISCARD_B => $add,
 | |
|             CHANGE    => $change,
 | |
|         },
 | |
|         @_
 | |
|     );
 | |
|     return wantarray ? @$retval : $retval;
 | |
| }
 | |
| 
 | |
| ########################################
 | |
| my $Root= __PACKAGE__;
 | |
| package Algorithm::Diff::_impl;
 | |
| use strict;
 | |
| 
 | |
| sub _Idx()  { 0 } # $me->[_Idx]: Ref to array of hunk indices
 | |
|             # 1   # $me->[1]: Ref to first sequence
 | |
|             # 2   # $me->[2]: Ref to second sequence
 | |
| sub _End()  { 3 } # $me->[_End]: Diff between forward and reverse pos
 | |
| sub _Same() { 4 } # $me->[_Same]: 1 if pos 1 contains unchanged items
 | |
| sub _Base() { 5 } # $me->[_Base]: Added to range's min and max
 | |
| sub _Pos()  { 6 } # $me->[_Pos]: Which hunk is currently selected
 | |
| sub _Off()  { 7 } # $me->[_Off]: Offset into _Idx for current position
 | |
| sub _Min() { -2 } # Added to _Off to get min instead of max+1
 | |
| 
 | |
| sub Die
 | |
| {
 | |
|     require Carp;
 | |
|     Carp::confess( @_ );
 | |
| }
 | |
| 
 | |
| sub _ChkPos
 | |
| {
 | |
|     my( $me )= @_;
 | |
|     return   if  $me->[_Pos];
 | |
|     my $meth= ( caller(1) )[3];
 | |
|     Die( "Called $meth on 'reset' object" );
 | |
| }
 | |
| 
 | |
| sub _ChkSeq
 | |
| {
 | |
|     my( $me, $seq )= @_;
 | |
|     return $seq + $me->[_Off]
 | |
|         if  1 == $seq  ||  2 == $seq;
 | |
|     my $meth= ( caller(1) )[3];
 | |
|     Die( "$meth: Invalid sequence number ($seq); must be 1 or 2" );
 | |
| }
 | |
| 
 | |
| sub getObjPkg
 | |
| {
 | |
|     my( $us )= @_;
 | |
|     return ref $us   if  ref $us;
 | |
|     return $us . "::_obj";
 | |
| }
 | |
| 
 | |
| sub new
 | |
| {
 | |
|     my( $us, $seq1, $seq2, $opts ) = @_;
 | |
|     my @args;
 | |
|     for( $opts->{keyGen} ) {
 | |
|         push @args, $_   if  $_;
 | |
|     }
 | |
|     for( $opts->{keyGenArgs} ) {
 | |
|         push @args, @$_   if  $_;
 | |
|     }
 | |
|     my $cdif= Algorithm::Diff::compact_diff( $seq1, $seq2, @args );
 | |
|     my $same= 1;
 | |
|     if(  0 == $cdif->[2]  &&  0 == $cdif->[3]  ) {
 | |
|         $same= 0;
 | |
|         splice @$cdif, 0, 2;
 | |
|     }
 | |
|     my @obj= ( $cdif, $seq1, $seq2 );
 | |
|     $obj[_End] = (1+@$cdif)/2;
 | |
|     $obj[_Same] = $same;
 | |
|     $obj[_Base] = 0;
 | |
|     my $me = bless \@obj, $us->getObjPkg();
 | |
|     $me->Reset( 0 );
 | |
|     return $me;
 | |
| }
 | |
| 
 | |
| sub Reset
 | |
| {
 | |
|     my( $me, $pos )= @_;
 | |
|     $pos= int( $pos || 0 );
 | |
|     $pos += $me->[_End]
 | |
|         if  $pos < 0;
 | |
|     $pos= 0
 | |
|         if  $pos < 0  ||  $me->[_End] <= $pos;
 | |
|     $me->[_Pos]= $pos || !1;
 | |
|     $me->[_Off]= 2*$pos - 1;
 | |
|     return $me;
 | |
| }
 | |
| 
 | |
| sub Base
 | |
| {
 | |
|     my( $me, $base )= @_;
 | |
|     my $oldBase= $me->[_Base];
 | |
|     $me->[_Base]= 0+$base   if  defined $base;
 | |
|     return $oldBase;
 | |
| }
 | |
| 
 | |
| sub Copy
 | |
| {
 | |
|     my( $me, $pos, $base )= @_;
 | |
|     my @obj= @$me;
 | |
|     my $you= bless \@obj, ref($me);
 | |
|     $you->Reset( $pos )   if  defined $pos;
 | |
|     $you->Base( $base );
 | |
|     return $you;
 | |
| }
 | |
| 
 | |
| sub Next {
 | |
|     my( $me, $steps )= @_;
 | |
|     $steps= 1   if  ! defined $steps;
 | |
|     if( $steps ) {
 | |
|         my $pos= $me->[_Pos];
 | |
|         my $new= $pos + $steps;
 | |
|         $new= 0   if  $pos  &&  $new < 0;
 | |
|         $me->Reset( $new )
 | |
|     }
 | |
|     return $me->[_Pos];
 | |
| }
 | |
| 
 | |
| sub Prev {
 | |
|     my( $me, $steps )= @_;
 | |
|     $steps= 1   if  ! defined $steps;
 | |
|     my $pos= $me->Next(-$steps);
 | |
|     $pos -= $me->[_End]   if  $pos;
 | |
|     return $pos;
 | |
| }
 | |
| 
 | |
| sub Diff {
 | |
|     my( $me )= @_;
 | |
|     $me->_ChkPos();
 | |
|     return 0   if  $me->[_Same] == ( 1 & $me->[_Pos] );
 | |
|     my $ret= 0;
 | |
|     my $off= $me->[_Off];
 | |
|     for my $seq ( 1, 2 ) {
 | |
|         $ret |= $seq
 | |
|             if  $me->[_Idx][ $off + $seq + _Min ]
 | |
|             <   $me->[_Idx][ $off + $seq ];
 | |
|     }
 | |
|     return $ret;
 | |
| }
 | |
| 
 | |
| sub Min {
 | |
|     my( $me, $seq, $base )= @_;
 | |
|     $me->_ChkPos();
 | |
|     my $off= $me->_ChkSeq($seq);
 | |
|     $base= $me->[_Base] if !defined $base;
 | |
|     return $base + $me->[_Idx][ $off + _Min ];
 | |
| }
 | |
| 
 | |
| sub Max {
 | |
|     my( $me, $seq, $base )= @_;
 | |
|     $me->_ChkPos();
 | |
|     my $off= $me->_ChkSeq($seq);
 | |
|     $base= $me->[_Base] if !defined $base;
 | |
|     return $base + $me->[_Idx][ $off ] -1;
 | |
| }
 | |
| 
 | |
| sub Range {
 | |
|     my( $me, $seq, $base )= @_;
 | |
|     $me->_ChkPos();
 | |
|     my $off = $me->_ChkSeq($seq);
 | |
|     if( !wantarray ) {
 | |
|         return  $me->[_Idx][ $off ]
 | |
|             -   $me->[_Idx][ $off + _Min ];
 | |
|     }
 | |
|     $base= $me->[_Base] if !defined $base;
 | |
|     return  ( $base + $me->[_Idx][ $off + _Min ] )
 | |
|         ..  ( $base + $me->[_Idx][ $off ] - 1 );
 | |
| }
 | |
| 
 | |
| sub Items {
 | |
|     my( $me, $seq )= @_;
 | |
|     $me->_ChkPos();
 | |
|     my $off = $me->_ChkSeq($seq);
 | |
|     if( !wantarray ) {
 | |
|         return  $me->[_Idx][ $off ]
 | |
|             -   $me->[_Idx][ $off + _Min ];
 | |
|     }
 | |
|     return
 | |
|         @{$me->[$seq]}[
 | |
|                 $me->[_Idx][ $off + _Min ]
 | |
|             ..  ( $me->[_Idx][ $off ] - 1 )
 | |
|         ];
 | |
| }
 | |
| 
 | |
| sub Same {
 | |
|     my( $me )= @_;
 | |
|     $me->_ChkPos();
 | |
|     return wantarray ? () : 0
 | |
|         if  $me->[_Same] != ( 1 & $me->[_Pos] );
 | |
|     return $me->Items(1);
 | |
| }
 | |
| 
 | |
| my %getName;
 | |
| BEGIN {
 | |
|     %getName= (
 | |
|         same => \&Same,
 | |
|         diff => \&Diff,
 | |
|         base => \&Base,
 | |
|         min  => \&Min,
 | |
|         max  => \&Max,
 | |
|         range=> \&Range,
 | |
|         items=> \&Items, # same thing
 | |
|     );
 | |
| }
 | |
| 
 | |
| sub Get
 | |
| {
 | |
|     my $me= shift @_;
 | |
|     $me->_ChkPos();
 | |
|     my @value;
 | |
|     for my $arg (  @_  ) {
 | |
|         for my $word (  split ' ', $arg  ) {
 | |
|             my $meth;
 | |
|             if(     $word !~ /^(-?\d+)?([a-zA-Z]+)([12])?$/
 | |
|                 ||  not  $meth= $getName{ lc $2 }
 | |
|             ) {
 | |
|                 Die( $Root, ", Get: Invalid request ($word)" );
 | |
|             }
 | |
|             my( $base, $name, $seq )= ( $1, $2, $3 );
 | |
|             push @value, scalar(
 | |
|                 4 == length($name)
 | |
|                     ? $meth->( $me )
 | |
|                     : $meth->( $me, $seq, $base )
 | |
|             );
 | |
|         }
 | |
|     }
 | |
|     if(  wantarray  ) {
 | |
|         return @value;
 | |
|     } elsif(  1 == @value  ) {
 | |
|         return $value[0];
 | |
|     }
 | |
|     Die( 0+@value, " values requested from ",
 | |
|         $Root, "'s Get in scalar context" );
 | |
| }
 | |
| 
 | |
| 
 | |
| my $Obj= getObjPkg($Root);
 | |
| no strict 'refs';
 | |
| 
 | |
| for my $meth (  qw( new getObjPkg )  ) {
 | |
|     *{$Root."::".$meth} = \&{$meth};
 | |
|     *{$Obj ."::".$meth} = \&{$meth};
 | |
| }
 | |
| for my $meth (  qw(
 | |
|     Next Prev Reset Copy Base Diff
 | |
|     Same Items Range Min Max Get
 | |
|     _ChkPos _ChkSeq
 | |
| )  ) {
 | |
|     *{$Obj."::".$meth} = \&{$meth};
 | |
| }
 | |
| 
 | |
| 1;
 | |
| # This version released by Tye McQueen (http://perlmonks.org/?node=tye).
 | |
| #
 | |
| # =head1 LICENSE
 | |
| #
 | |
| # Parts Copyright (c) 2000-2004 Ned Konz.  All rights reserved.
 | |
| # Parts by Tye McQueen.
 | |
| #
 | |
| # This program is free software; you can redistribute it and/or modify it
 | |
| # under the same terms as Perl.
 | |
| #
 | |
| # =head1 MAILING LIST
 | |
| #
 | |
| # Mark-Jason still maintains a mailing list.  To join a low-volume mailing
 | |
| # list for announcements related to diff and Algorithm::Diff, send an
 | |
| # empty mail message to mjd-perl-diff-request@plover.com.
 | |
| # =head1 CREDITS
 | |
| #
 | |
| # Versions through 0.59 (and much of this documentation) were written by:
 | |
| #
 | |
| # Mark-Jason Dominus, mjd-perl-diff@plover.com
 | |
| #
 | |
| # This version borrows some documentation and routine names from
 | |
| # Mark-Jason's, but Diff.pm's code was completely replaced.
 | |
| #
 | |
| # This code was adapted from the Smalltalk code of Mario Wolczko
 | |
| # <mario@wolczko.com>, which is available at
 | |
| # ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st
 | |
| #
 | |
| # C<sdiff> and C<traverse_balanced> were written by Mike Schilli
 | |
| # <m@perlmeister.com>.
 | |
| #
 | |
| # The algorithm is that described in
 | |
| # I<A Fast Algorithm for Computing Longest Common Subsequences>,
 | |
| # CACM, vol.20, no.5, pp.350-353, May 1977, with a few
 | |
| # minor improvements to improve the speed.
 | |
| #
 | |
| # Much work was done by Ned Konz (perl@bike-nomad.com).
 | |
| #
 | |
| # The OO interface and some other changes are by Tye McQueen.
 | |
| #
 | |
| EOAlgDiff
 | |
| # 2}}}
 | |
|     my $problems        = 0;
 | |
|     $HAVE_Algorithm_Diff = 0;
 | |
|     my $dir             = "";
 | |
|     if ($opt_sdir) {
 | |
|         ++$TEMP_OFF;
 | |
|         $dir = "$opt_sdir/$TEMP_OFF";
 | |
|         File::Path::rmtree($dir) if     is_dir($dir);
 | |
|         File::Path::mkpath($dir) unless is_dir($dir);
 | |
|     } else {
 | |
|         # let File::Temp create a suitable temporary directory
 | |
|         $dir = tempdir( CLEANUP => 1 );  # 1 = delete on exit
 | |
|         $TEMP_INST{ $dir } = "Algorithm::Diff";
 | |
|     }
 | |
|     print "Using temp dir [$dir] to install Algorithm::Diff\n" if $opt_v;
 | |
|     my $Algorithm_dir      = "$dir/Algorithm";
 | |
|     my $Algorithm_Diff_dir = "$dir/Algorithm/Diff";
 | |
|     mkdir $Algorithm_dir     ;
 | |
|     mkdir $Algorithm_Diff_dir;
 | |
| 
 | |
|     my $OUT = open_file('>', "$dir/Algorithm/Diff.pm", 1);
 | |
|     if (defined $OUT) {
 | |
|         print $OUT $Algorithm_Diff_Contents;
 | |
|         $OUT->close;
 | |
|     } else {
 | |
|         warn "Failed to install Algorithm/Diff.pm\n";
 | |
|         $problems = 1;
 | |
|     }
 | |
| 
 | |
|     push @INC, $dir;  # between this & Regexp::Common only need to do once
 | |
|     eval "use Algorithm::Diff qw / sdiff /";
 | |
|     $HAVE_Algorithm_Diff = 1 unless $problems;
 | |
| } # 1}}}
 | |
| __END__
 | |
| mode values (stat $item)[2]
 | |
|        Unix    Windows
 | |
| file:  33188   33206
 | |
| dir :  16832   16895
 | |
| link:  33261   33206
 | |
| pipe:   4544    null
 | 
