18207 lines
731 KiB
Perl
Executable File
18207 lines
731 KiB
Perl
Executable File
#!/usr/bin/env perl
|
|
# cloc -- Count Lines of Code {{{1
|
|
# Copyright (C) 2006-2024 Al Danial <al.danial@gmail.com>
|
|
# First release August 2006
|
|
#
|
|
# Includes code from:
|
|
# - SLOCCount v2.26
|
|
# http://www.dwheeler.com/sloccount/
|
|
# by David Wheeler.
|
|
# - Regexp::Common v2017060201
|
|
# https://metacpan.org/pod/Regexp::Common
|
|
# by Damian Conway and Abigail.
|
|
# - Win32::Autoglob 1.01
|
|
# https://metacpan.org/pod/Win32::Autoglob
|
|
# by Sean M. Burke.
|
|
# - Algorithm::Diff 1.1902
|
|
# https://metacpan.org/pod/Algorithm::Diff
|
|
# by Tye McQueen.
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details:
|
|
# <http://www.gnu.org/licenses/gpl.txt>.
|
|
#
|
|
# 1}}}
|
|
my $VERSION = "2.01"; # odd number == beta; even number == stable
|
|
my $URL = "github.com/AlDanial/cloc"; # 'https://' pushes header too wide
|
|
require 5.10.0;
|
|
# use modules {{{1
|
|
use warnings;
|
|
use strict;
|
|
|
|
use Getopt::Long;
|
|
use File::Basename;
|
|
use File::Temp qw { tempfile tempdir };
|
|
use File::Find;
|
|
use File::Path;
|
|
use File::Spec;
|
|
use IO::File;
|
|
use List::Util qw( min max );
|
|
use Cwd;
|
|
use POSIX qw { strftime ceil};
|
|
# Parallel::ForkManager isn't in the standard distribution.
|
|
# Use it only if installed, and only if --processes=N is given.
|
|
# The module load happens in get_max_processes().
|
|
my $HAVE_Parallel_ForkManager = 0;
|
|
|
|
# Digest::MD5 isn't in the standard distribution. Use it only if installed.
|
|
my $HAVE_Digest_MD5 = 0;
|
|
eval "use Digest::MD5;";
|
|
if (defined $Digest::MD5::VERSION) {
|
|
$HAVE_Digest_MD5 = 1;
|
|
} else {
|
|
warn "Digest::MD5 not installed; will skip file uniqueness checks.\n";
|
|
}
|
|
|
|
# Time::HiRes became standard with Perl 5.8
|
|
my $HAVE_Time_HiRes = 0;
|
|
eval "use Time::HiRes;";
|
|
$HAVE_Time_HiRes = 1 if defined $Time::HiRes::VERSION;
|
|
|
|
my $HAVE_Rexexp_Common;
|
|
# Regexp::Common isn't in the standard distribution. It will
|
|
# be installed in a temp directory if necessary.
|
|
eval "use Regexp::Common qw ( comment ) ";
|
|
if (defined $Regexp::Common::VERSION) {
|
|
$HAVE_Rexexp_Common = 1;
|
|
} else {
|
|
$HAVE_Rexexp_Common = 0;
|
|
}
|
|
|
|
my $HAVE_Algorithm_Diff = 0;
|
|
# Algorithm::Diff isn't in the standard distribution. It will
|
|
# be installed in a temp directory if necessary.
|
|
eval "use Algorithm::Diff qw ( sdiff ) ";
|
|
if (defined $Algorithm::Diff::VERSION) {
|
|
$HAVE_Algorithm_Diff = 1;
|
|
} else {
|
|
Install_Algorithm_Diff();
|
|
}
|
|
|
|
# print "2 HAVE_Algorithm_Diff = $HAVE_Algorithm_Diff\n";
|
|
# test_alg_diff($ARGV[$#ARGV - 1], $ARGV[$#ARGV]); die;
|
|
# die "Hre=$HAVE_Rexexp_Common Had=$HAVE_Algorithm_Diff";
|
|
|
|
# Uncomment next two lines when building Windows executable with perl2exe
|
|
# or if running on a system that already has Regexp::Common.
|
|
#use Regexp::Common;
|
|
#$HAVE_Rexexp_Common = 1;
|
|
|
|
#perl2exe_include "Regexp/Common/whitespace.pm"
|
|
#perl2exe_include "Regexp/Common/URI.pm"
|
|
#perl2exe_include "Regexp/Common/URI/fax.pm"
|
|
#perl2exe_include "Regexp/Common/URI/file.pm"
|
|
#perl2exe_include "Regexp/Common/URI/ftp.pm"
|
|
#perl2exe_include "Regexp/Common/URI/gopher.pm"
|
|
#perl2exe_include "Regexp/Common/URI/http.pm"
|
|
#perl2exe_include "Regexp/Common/URI/pop.pm"
|
|
#perl2exe_include "Regexp/Common/URI/prospero.pm"
|
|
#perl2exe_include "Regexp/Common/URI/news.pm"
|
|
#perl2exe_include "Regexp/Common/URI/tel.pm"
|
|
#perl2exe_include "Regexp/Common/URI/telnet.pm"
|
|
#perl2exe_include "Regexp/Common/URI/tv.pm"
|
|
#perl2exe_include "Regexp/Common/URI/wais.pm"
|
|
#perl2exe_include "Regexp/Common/CC.pm"
|
|
#perl2exe_include "Regexp/Common/SEN.pm"
|
|
#perl2exe_include "Regexp/Common/number.pm"
|
|
#perl2exe_include "Regexp/Common/delimited.pm"
|
|
#perl2exe_include "Regexp/Common/profanity.pm"
|
|
#perl2exe_include "Regexp/Common/net.pm"
|
|
#perl2exe_include "Regexp/Common/zip.pm"
|
|
#perl2exe_include "Regexp/Common/comment.pm"
|
|
#perl2exe_include "Regexp/Common/balanced.pm"
|
|
#perl2exe_include "Regexp/Common/lingua.pm"
|
|
#perl2exe_include "Regexp/Common/list.pm"
|
|
#perl2exe_include "File/Glob.pm"
|
|
|
|
use Text::Tabs qw { expand };
|
|
use Cwd qw { cwd };
|
|
use File::Glob;
|
|
# 1}}}
|
|
# Usage information, options processing. {{{1
|
|
my $ON_WINDOWS = 0;
|
|
$ON_WINDOWS = 1 if ($^O =~ /^MSWin/) or ($^O eq "Windows_NT");
|
|
if ($ON_WINDOWS and $ENV{'SHELL'}) {
|
|
if ($ENV{'SHELL'} =~ m{^/}) {
|
|
$ON_WINDOWS = 0; # make Cygwin look like Unix
|
|
} else {
|
|
$ON_WINDOWS = 1; # MKS defines $SHELL but still acts like Windows
|
|
}
|
|
}
|
|
|
|
my $HAVE_Win32_Long_Path = 0;
|
|
# Win32::LongPath is an optional dependency that when available on
|
|
# Windows will be used to support reading files past the 255 char
|
|
# path length limit.
|
|
if ($ON_WINDOWS) {
|
|
eval "use Win32::LongPath;";
|
|
if (defined $Win32::LongPath::VERSION) {
|
|
$HAVE_Win32_Long_Path = 1;
|
|
}
|
|
}
|
|
my $config_file = '';
|
|
if ( $ENV{'HOME'} ) {
|
|
$config_file = File::Spec->catfile( $ENV{'HOME'}, '.config', 'cloc', 'options.txt');
|
|
} elsif ( $ENV{'APPDATA'} and $ON_WINDOWS ) {
|
|
$config_file = File::Spec->catfile( $ENV{'APPDATA'}, 'cloc');
|
|
}
|
|
# $config_file may be updated by check_alternate_config_files()
|
|
|
|
my $NN = chr(27) . "[0m"; # normal
|
|
$NN = "" if $ON_WINDOWS or !(-t STDOUT); # -t STDOUT: is it a terminal?
|
|
my $BB = chr(27) . "[1m"; # bold
|
|
$BB = "" if $ON_WINDOWS or !(-t STDOUT);
|
|
my $script = basename $0;
|
|
|
|
# Intended for v1.88:
|
|
# --git-diff-simindex Git diff strategy #3: use git's similarity index
|
|
# (git diff -M --name-status) to identify file pairs
|
|
# to compare. This is especially useful to compare
|
|
# files that were renamed between the commits.
|
|
|
|
my $brief_usage = "
|
|
cloc -- Count Lines of Code
|
|
|
|
Usage:
|
|
$script [options] <file(s)/dir(s)/git hash(es)>
|
|
Count physical lines of source code and comments in the given files
|
|
(may be archives such as compressed tarballs or zip files) and/or
|
|
recursively below the given directories or git commit hashes.
|
|
Example: cloc src/ include/ main.c
|
|
|
|
$script [options] --diff <set1> <set2>
|
|
Compute differences of physical lines of source code and comments
|
|
between any pairwise combination of directory names, archive
|
|
files or git commit hashes.
|
|
Example: cloc --diff Python-3.5.tar.xz python-3.6/
|
|
|
|
$script --help shows full documentation on the options.
|
|
https://$URL has numerous examples and more information.
|
|
";
|
|
my $usage = "
|
|
Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <report files>
|
|
|
|
Count, or compute differences of, physical lines of source code in the
|
|
given files (may be archives such as compressed tarballs or zip files,
|
|
or git commit hashes or branch names) and/or recursively below the
|
|
given directories.
|
|
|
|
${BB}Input Options${NN}
|
|
--extract-with=<cmd> This option is only needed if cloc is unable
|
|
to figure out how to extract the contents of
|
|
the input file(s) by itself.
|
|
Use <cmd> to extract binary archive files (e.g.:
|
|
.tar.gz, .zip, .Z). Use the literal '>FILE<' as
|
|
a stand-in for the actual file(s) to be
|
|
extracted. For example, to count lines of code
|
|
in the input files
|
|
gcc-4.2.tar.gz perl-5.8.8.tar.gz
|
|
on Unix use
|
|
--extract-with='gzip -dc >FILE< | tar xf -'
|
|
or, if you have GNU tar,
|
|
--extract-with='tar zxf >FILE<'
|
|
and on Windows use, for example:
|
|
--extract-with=\"\\\"c:\\Program Files\\WinZip\\WinZip32.exe\\\" -e -o >FILE< .\"
|
|
(if WinZip is installed there).
|
|
--list-file=<file> Take the list of file and/or directory names to
|
|
process from <file>, which has one file/directory
|
|
name per line. Only exact matches are counted;
|
|
relative path names will be resolved starting from
|
|
the directory where cloc is invoked. Set <file>
|
|
to - to read file names from a STDIN pipe.
|
|
See also --exclude-list-file, --config.
|
|
--diff-list-file=<file> Take the pairs of file names to be diff'ed from
|
|
<file>, whose format matches the output of
|
|
--diff-alignment. (Run with that option to
|
|
see a sample.) The language identifier at the
|
|
end of each line is ignored. This enables --diff
|
|
mode and bypasses file pair alignment logic.
|
|
Use --diff-list-files to define the file name
|
|
pairs in separate files. See also --config.
|
|
--diff-list-files <file1> <file2>
|
|
Compute differences in code and comments between
|
|
the files and directories listed in <file1> and
|
|
<file2>. Each input file should use the same
|
|
format as --list-file, where there is one file or
|
|
directory name per line. Only exact matches are
|
|
counted; relative path names will be resolved
|
|
starting from the directory where cloc is invoked.
|
|
This enables --diff mode. See also --list-file,
|
|
--diff-list-file, --diff.
|
|
--vcs=<VCS> Invoke a system call to <VCS> to obtain a list of
|
|
files to work on. If <VCS> is 'git', then will
|
|
invoke 'git ls-files' to get a file list and
|
|
'git submodule status' to get a list of submodules
|
|
whose contents will be ignored. See also --git
|
|
which accepts git commit hashes and branch names.
|
|
If <VCS> is 'svn' then will invoke 'svn list -R'.
|
|
The primary benefit is that cloc will then skip
|
|
files explicitly excluded by the versioning tool
|
|
in question, ie, those in .gitignore or have the
|
|
svn:ignore property.
|
|
Alternatively <VCS> may be any system command
|
|
that generates a list of files.
|
|
Note: cloc must be in a directory which can read
|
|
the files as they are returned by <VCS>. cloc will
|
|
not download files from remote repositories.
|
|
'svn list -R' may refer to a remote repository
|
|
to obtain file names (and therefore may require
|
|
authentication to the remote repository), but
|
|
the files themselves must be local.
|
|
Setting <VCS> to 'auto' selects between 'git'
|
|
and 'svn' (or neither) depending on the presence
|
|
of a .git or .svn subdirectory below the directory
|
|
where cloc is invoked.
|
|
--unicode Check binary files to see if they contain Unicode
|
|
expanded ASCII text. This causes performance to
|
|
drop noticeably.
|
|
|
|
${BB}Processing Options${NN}
|
|
--autoconf Count .in files (as processed by GNU autoconf) of
|
|
recognized languages. See also --no-autogen.
|
|
--by-file Report results for every source file encountered.
|
|
See also --fmt under 'Output Options'.
|
|
--by-file-by-lang Report results for every source file encountered
|
|
in addition to reporting by language.
|
|
--config <file> Read command line switches from <file> instead of
|
|
the default location of $config_file.
|
|
The file should contain one switch, along with
|
|
arguments (if any), per line. Blank lines and lines
|
|
beginning with '#' are skipped. Options given on
|
|
the command line take priority over entries read from
|
|
the file.
|
|
If a directory is also given with any of these
|
|
switches: --list-file, --exclude-list-file,
|
|
--read-lang-def, --force-lang-def, --diff-list-file
|
|
and a config file exists in that directory, it will
|
|
take priority over $config_file.
|
|
--count-and-diff <set1> <set2>
|
|
First perform direct code counts of source file(s)
|
|
of <set1> and <set2> separately, then perform a diff
|
|
of these. Inputs may be pairs of files, directories,
|
|
or archives. If --out or --report-file is given,
|
|
three output files will be created, one for each
|
|
of the two counts and one for the diff. See also
|
|
--diff, --diff-alignment, --diff-timeout,
|
|
--ignore-case, --ignore-whitespace.
|
|
--diff <set1> <set2> Compute differences in code and comments between
|
|
source file(s) of <set1> and <set2>. The inputs
|
|
may be any mix of files, directories, archives,
|
|
or git commit hashes. Use --diff-alignment to
|
|
generate a list showing which file pairs where
|
|
compared. When comparing git branches, only files
|
|
which have changed in either commit are compared.
|
|
See also --git, --count-and-diff, --diff-alignment,
|
|
--diff-list-file, --diff-timeout, --ignore-case,
|
|
--ignore-whitespace.
|
|
--diff-timeout <N> Ignore files which take more than <N> seconds
|
|
to process. Default is 10 seconds. Setting <N>
|
|
to 0 allows unlimited time. (Large files with many
|
|
repeated lines can cause Algorithm::Diff::sdiff()
|
|
to take hours.) See also --timeout.
|
|
--docstring-as-code cloc considers docstrings to be comments, but this is
|
|
not always correct as docstrings represent regular
|
|
strings when they appear on the right hand side of an
|
|
assignment or as function arguments. This switch
|
|
forces docstrings to be counted as code.
|
|
--follow-links [Unix only] Follow symbolic links to directories
|
|
(sym links to files are always followed).
|
|
See also --stat.
|
|
--force-lang=<lang>[,<ext>]
|
|
Process all files that have a <ext> extension
|
|
with the counter for language <lang>. For
|
|
example, to count all .f files with the
|
|
Fortran 90 counter (which expects files to
|
|
end with .f90) instead of the default Fortran 77
|
|
counter, use
|
|
--force-lang=\"Fortran 90,f\"
|
|
If <ext> is omitted, every file will be counted
|
|
with the <lang> counter. This option can be
|
|
specified multiple times (but that is only
|
|
useful when <ext> is given each time).
|
|
See also --script-lang, --lang-no-ext.
|
|
--force-lang-def=<file> Load language processing filters from <file>,
|
|
then use these filters instead of the built-in
|
|
filters. Note: languages which map to the same
|
|
file extension (for example:
|
|
MATLAB/Mathematica/Objective-C/MUMPS/Mercury;
|
|
Pascal/PHP; Lisp/OpenCL; Lisp/Julia; Perl/Prolog)
|
|
will be ignored as these require additional
|
|
processing that is not expressed in language
|
|
definition files. Use --read-lang-def to define
|
|
new language filters without replacing built-in
|
|
filters (see also --write-lang-def,
|
|
--write-lang-def-incl-dup, --config).
|
|
--git Forces the inputs to be interpreted as git targets
|
|
(commit hashes, branch names, et cetera) if these
|
|
are not first identified as file or directory
|
|
names. This option overrides the --vcs=git logic
|
|
if this is given; in other words, --git gets its
|
|
list of files to work on directly from git using
|
|
the hash or branch name rather than from
|
|
'git ls-files'. This option can be used with
|
|
--diff to perform line count diffs between git
|
|
commits, or between a git commit and a file,
|
|
directory, or archive. Use -v/--verbose to see
|
|
the git system commands cloc issues.
|
|
--git-diff-rel Same as --git --diff, or just --diff if the inputs
|
|
are recognized as git targets. Only files which
|
|
have changed in either commit are compared.
|
|
--git-diff-all Git diff strategy #2: compare all files in the
|
|
repository between the two commits.
|
|
--ignore-whitespace Ignore horizontal white space when comparing files
|
|
with --diff. See also --ignore-case.
|
|
--ignore-case Ignore changes in case within file contents;
|
|
consider upper- and lowercase letters equivalent
|
|
when comparing files with --diff. See also
|
|
--ignore-whitespace.
|
|
--ignore-case-ext Ignore case of file name extensions. This will
|
|
cause problems counting some languages
|
|
(specifically, .c and .C are associated with C and
|
|
C++; this switch would count .C files as C rather
|
|
than C++ on *nix operating systems). File name
|
|
case insensitivity is always true on Windows.
|
|
--lang-no-ext=<lang> Count files without extensions using the <lang>
|
|
counter. This option overrides internal logic
|
|
for files without extensions (where such files
|
|
are checked against known scripting languages
|
|
by examining the first line for #!). See also
|
|
--force-lang, --script-lang.
|
|
--max-file-size=<MB> Skip files larger than <MB> megabytes when
|
|
traversing directories. By default, <MB>=100.
|
|
cloc's memory requirement is roughly twenty times
|
|
larger than the largest file so running with
|
|
files larger than 100 MB on a computer with less
|
|
than 2 GB of memory will cause problems.
|
|
Note: this check does not apply to files
|
|
explicitly passed as command line arguments.
|
|
--no-autogen[=list] Ignore files generated by code-production systems
|
|
such as GNU autoconf. To see a list of these files
|
|
(then exit), run with --no-autogen list
|
|
See also --autoconf.
|
|
--no-recurse Count files in the given directories without
|
|
recursively descending below them.
|
|
--original-dir [Only effective in combination with
|
|
--strip-comments or --strip-code] Write the stripped
|
|
files to the same directory as the original files.
|
|
--only-count-files Only count files by language. Blank, comment, and
|
|
code counts will be zero.
|
|
--read-binary-files Process binary files in addition to text files.
|
|
This is usually a bad idea and should only be
|
|
attempted with text files that have embedded
|
|
binary data.
|
|
--read-lang-def=<file> Load new language processing filters from <file>
|
|
and merge them with those already known to cloc.
|
|
If <file> defines a language cloc already knows
|
|
about, cloc's definition will take precedence.
|
|
Use --force-lang-def to over-ride cloc's
|
|
definitions (see also --write-lang-def,
|
|
--write-lang-def-incl-dup, --config).
|
|
--script-lang=<lang>,<s> Process all files that invoke <s> as a #!
|
|
scripting language with the counter for language
|
|
<lang>. For example, files that begin with
|
|
#!/usr/local/bin/perl5.8.8
|
|
will be counted with the Perl counter by using
|
|
--script-lang=Perl,perl5.8.8
|
|
The language name is case insensitive but the
|
|
name of the script language executable, <s>,
|
|
must have the right case. This option can be
|
|
specified multiple times. See also --force-lang,
|
|
--lang-no-ext.
|
|
--sdir=<dir> Use <dir> as the scratch directory instead of
|
|
letting File::Temp chose the location. Files
|
|
written to this location are not removed at
|
|
the end of the run (as they are with File::Temp).
|
|
--skip-leading=<N[,ext]> Skip the first <N> lines of each file. If a
|
|
comma separated list of extensions is also given,
|
|
only skip lines from those file types. Example:
|
|
--skip-leading=10,cpp,h
|
|
will skip the first ten lines of *.cpp and *.h
|
|
files. This is useful for ignoring boilerplate
|
|
text.
|
|
--skip-uniqueness Skip the file uniqueness check. This will give
|
|
a performance boost at the expense of counting
|
|
files with identical contents multiple times
|
|
(if such duplicates exist).
|
|
--stat Some file systems (AFS, CD-ROM, FAT, HPFS, SMB)
|
|
do not have directory 'nlink' counts that match
|
|
the number of its subdirectories. Consequently
|
|
cloc may undercount or completely skip the
|
|
contents of such file systems. This switch forces
|
|
File::Find to stat directories to obtain the
|
|
correct count. File search speed will decrease.
|
|
See also --follow-links.
|
|
--stdin-name=<file> Give a file name to use to determine the language
|
|
for standard input. (Use - as the input name to
|
|
receive source code via STDIN.)
|
|
--strip-code=<ext> For each file processed, write to the current
|
|
directory a version of the file which has blank
|
|
and code lines, including code with (in-line
|
|
comments) removed. The name of each stripped file
|
|
is the original file name with .<ext> appended to
|
|
it. It is written to the current directory unless
|
|
--original-dir is on.
|
|
--strip-comments=<ext> For each file processed, write to the current
|
|
directory a version of the file which has blank
|
|
and commented lines removed (in-line comments
|
|
persist). The name of each stripped file is the
|
|
original file name with .<ext> appended to it.
|
|
It is written to the current directory unless
|
|
--original-dir is on.
|
|
--strip-str-comments Replace comment markers embedded in strings with
|
|
'xx'. This attempts to work around a limitation
|
|
in Regexp::Common::Comment where comment markers
|
|
embedded in strings are seen as actual comment
|
|
markers and not strings, often resulting in a
|
|
'Complex regular subexpression recursion limit'
|
|
warning and incorrect counts. There are two
|
|
disadvantages to using this switch: 1/code count
|
|
performance drops, and 2/code generated with
|
|
--strip-comments will contain different strings
|
|
where ever embedded comments are found.
|
|
--sum-reports Input arguments are report files previously
|
|
created with the --report-file option in plain
|
|
format (eg. not JSON, YAML, XML, or SQL).
|
|
Makes a cumulative set of results containing the
|
|
sum of data from the individual report files.
|
|
--timeout <N> Ignore files which take more than <N> seconds
|
|
to process at any of the language's filter stages.
|
|
The default maximum number of seconds spent on a
|
|
filter stage is the number of lines in the file
|
|
divided by one thousand. Setting <N> to 0 allows
|
|
unlimited time. See also --diff-timeout.
|
|
--processes=NUM [Available only on systems with a recent version
|
|
of the Parallel::ForkManager module. Not
|
|
available on Windows.] Sets the maximum number of
|
|
cores that cloc uses. The default value of 0
|
|
disables multiprocessing.
|
|
--unix Override the operating system autodetection
|
|
logic and run in UNIX mode. See also
|
|
--windows, --show-os.
|
|
--use-sloccount If SLOCCount is installed, use its compiled
|
|
executables c_count, java_count, pascal_count,
|
|
php_count, and xml_count instead of cloc's
|
|
counters. SLOCCount's compiled counters are
|
|
substantially faster than cloc's and may give
|
|
a performance improvement when counting projects
|
|
with large files. However, these cloc-specific
|
|
features will not be available: --diff,
|
|
--count-and-diff, --strip-code, --strip-comments,
|
|
--unicode.
|
|
--windows Override the operating system autodetection
|
|
logic and run in Microsoft Windows mode.
|
|
See also --unix, --show-os.
|
|
|
|
${BB}Filter Options${NN}
|
|
--include-content=<regex> Only count files containing text that matches the
|
|
given regular expression.
|
|
--exclude-content=<regex> Exclude files containing text that matches the given
|
|
regular expression.
|
|
--exclude-dir=<D1>[,D2,] Exclude the given comma separated directories
|
|
D1, D2, D3, et cetera, from being scanned. For
|
|
example --exclude-dir=.cache,test will skip
|
|
all files and subdirectories that have /.cache/
|
|
or /test/ as their parent directory.
|
|
Directories named .bzr, .cvs, .hg, .git, .svn,
|
|
and .snapshot are always excluded.
|
|
This option only works with individual directory
|
|
names so including file path separators is not
|
|
allowed. Use --fullpath and --not-match-d=<regex>
|
|
to supply a regex matching multiple subdirectories.
|
|
--exclude-ext=<ext1>[,<ext2>[...]]
|
|
Do not count files having the given file name
|
|
extensions.
|
|
--exclude-lang=<L1>[,L2[...]]
|
|
Exclude the given comma separated languages
|
|
L1, L2, L3, et cetera, from being counted.
|
|
--exclude-list-file=<file> Ignore files and/or directories whose names
|
|
appear in <file>. <file> should have one file
|
|
name per line. Only exact matches are ignored;
|
|
relative path names will be resolved starting from
|
|
the directory where cloc is invoked.
|
|
See also --list-file, --config.
|
|
--fullpath Modifies the behavior of --match-f, --not-match-f,
|
|
and --not-match-d to include the file's path--
|
|
relative to the directory from which cloc is
|
|
invoked--in the regex, not just the file's basename.
|
|
(This does not expand each filename to include its
|
|
fully qualified absolute path; instead, it uses as
|
|
much of the path as is passed in to cloc.)
|
|
--include-ext=<ext1>[,ext2[...]]
|
|
Count only languages having the given comma
|
|
separated file extensions. Use --show-ext to
|
|
see the recognized extensions.
|
|
--include-lang=<L1>[,L2[...]]
|
|
Count only the given comma separated, case-
|
|
insensitive languages L1, L2, L3, et cetera. Use
|
|
--show-lang to see the list of recognized languages.
|
|
--match-d=<regex> Only count files in directories matching the Perl
|
|
regex. For example
|
|
--match-d='/(src|include)/'
|
|
only counts files in directories containing
|
|
/src/ or /include/. Unlike --not-match-d,
|
|
--match-f, and --not-match-f, --match-d always
|
|
anchors the regex to the directory from which
|
|
cloc is invoked.
|
|
--not-match-d=<regex> Count all files except those in directories
|
|
matching the Perl regex. Only the trailing
|
|
directory name is compared, for example, when
|
|
counting in /usr/local/lib, only 'lib' is
|
|
compared to the regex.
|
|
Add --fullpath to compare parent directories, beginning
|
|
from the directory where cloc is invoked, to the regex.
|
|
Do not include file path separators at the beginning
|
|
or end of the regex. This option may be repeated.
|
|
--match-f=<regex> Only count files whose basenames match the Perl
|
|
regex. For example
|
|
--match-f='^[Ww]idget'
|
|
only counts files that start with Widget or widget.
|
|
Add --fullpath to include parent directories
|
|
in the regex instead of just the basename.
|
|
--not-match-f=<regex> Count all files except those whose basenames
|
|
match the Perl regex. Add --fullpath to include
|
|
parent directories in the regex instead of just
|
|
the basename. This option may be repeated.
|
|
--skip-archive=<regex> Ignore files that end with the given Perl regular
|
|
expression. For example, if given
|
|
--skip-archive='(zip|tar(\.(gz|Z|bz2|xz|7z))?)'
|
|
the code will skip files that end with .zip,
|
|
.tar, .tar.gz, .tar.Z, .tar.bz2, .tar.xz, and
|
|
.tar.7z.
|
|
--skip-win-hidden On Windows, ignore hidden files.
|
|
|
|
${BB}Debug Options${NN}
|
|
--categorized=<file> Save file sizes in bytes, identified languages
|
|
and names of categorized files to <file>.
|
|
--counted=<file> Save names of processed source files to <file>.
|
|
--diff-alignment=<file> Write to <file> a list of files and file pairs
|
|
showing which files were added, removed, and/or
|
|
compared during a run with --diff. This switch
|
|
forces the --diff mode on.
|
|
--explain=<lang> Print the filters used to remove comments for
|
|
language <lang> and exit. In some cases the
|
|
filters refer to Perl subroutines rather than
|
|
regular expressions. An examination of the
|
|
source code may be needed for further explanation.
|
|
--help Print this usage information and exit.
|
|
--found=<file> Save names of every file found to <file>.
|
|
--ignored=<file> Save names of ignored files and the reason they
|
|
were ignored to <file>.
|
|
--print-filter-stages Print processed source code before and after
|
|
each filter is applied.
|
|
--show-ext[=<ext>] Print information about all known (or just the
|
|
given) file extensions and exit.
|
|
--show-lang[=<lang>] Print information about all known (or just the
|
|
given) languages and exit.
|
|
--show-os Print the value of the operating system mode
|
|
and exit. See also --unix, --windows.
|
|
-v[=<n>] Verbose switch (optional numeric value).
|
|
-verbose[=<n>] Long form of -v.
|
|
--version Print the version of this program and exit.
|
|
--write-lang-def=<file> Writes to <file> the language processing filters
|
|
then exits. Useful as a first step to creating
|
|
custom language definitions. Note: languages which
|
|
map to the same file extension will be excluded.
|
|
(See also --force-lang-def, --read-lang-def).
|
|
--write-lang-def-incl-dup=<file>
|
|
Same as --write-lang-def, but includes duplicated
|
|
extensions. This generates a problematic language
|
|
definition file because cloc will refuse to use
|
|
it until duplicates are removed.
|
|
|
|
${BB}Output Options${NN}
|
|
--3 Print third-generation language output.
|
|
(This option can cause report summation to fail
|
|
if some reports were produced with this option
|
|
while others were produced without it.)
|
|
--by-percent X Instead of comment and blank line counts, show
|
|
these values as percentages based on the value
|
|
of X in the denominator, where X is
|
|
c meaning lines of code
|
|
cm meaning lines of code + comments
|
|
cb meaning lines of code + blanks
|
|
cmb meaning lines of code + comments + blanks
|
|
For example, if using method 'c' and your code
|
|
has twice as many lines of comments as lines
|
|
of code, the value in the comment column will
|
|
be 200%. The code column remains a line count.
|
|
--csv Write the results as comma separated values.
|
|
--csv-delimiter=<C> Use the character <C> as the delimiter for comma
|
|
separated files instead of ,. This switch forces
|
|
--file-encoding=<E> Write output files using the <E> encoding instead of
|
|
the default ASCII (<E> = 'UTF-7'). Examples: 'UTF-16',
|
|
'euc-kr', 'iso-8859-16'. Known encodings can be
|
|
printed with
|
|
perl -MEncode -e 'print join(\"\\n\", Encode->encodings(\":all\")), \"\\n\"'
|
|
--fmt=<N> Alternate text output format where <N> is a number
|
|
from 1 to 5, or -1 to -5. 'total lines' means the
|
|
sum of code, comment, and blank lines. Negative
|
|
values are the same as the positive values but retain,
|
|
instead of deleting, the intermediate JSON file that
|
|
is written. The JSON file name is randomly generated
|
|
unless --out/--report-file is given. The formats are:
|
|
1: by language (same as cloc default output)
|
|
2: by language with an extra column for total lines
|
|
3: by file with language
|
|
4: by file with a total lines column
|
|
5: by file with language and a total lines column
|
|
--hide-rate Do not show elapsed time, line processing rate, or
|
|
file processing rates in the output header. This
|
|
makes output deterministic.
|
|
--json Write the results as JavaScript Object Notation
|
|
(JSON) formatted output.
|
|
--md Write the results as Markdown-formatted text.
|
|
--out=<file> Synonym for --report-file=<file>.
|
|
--progress-rate=<n> Show progress update after every <n> files are
|
|
processed (default <n>=100). Set <n> to 0 to
|
|
suppress progress output (useful when redirecting
|
|
output to STDOUT).
|
|
--quiet Suppress all information messages except for
|
|
the final report.
|
|
--report-file=<file> Write the results to <file> instead of STDOUT.
|
|
--summary-cutoff=X:N Aggregate to 'Other' results having X lines
|
|
below N where X is one of
|
|
c meaning lines of code
|
|
f meaning files
|
|
m meaning lines of comments
|
|
cm meaning lines of code + comments
|
|
Appending a percent sign to N changes
|
|
the calculation from straight count to
|
|
percentage.
|
|
Ignored with --diff or --by-file.
|
|
--sql=<file> Write results as SQL create and insert statements
|
|
which can be read by a database program such as
|
|
SQLite. If <file> is -, output is sent to STDOUT.
|
|
--sql-append Append SQL insert statements to the file specified
|
|
by --sql and do not generate table creation
|
|
statements. Only valid with the --sql option.
|
|
--sql-project=<name> Use <name> as the project identifier for the
|
|
current run. Only valid with the --sql option.
|
|
--sql-style=<style> Write SQL statements in the given style instead
|
|
of the default SQLite format. Styles include
|
|
'Oracle' and 'Named_Columns'.
|
|
--sum-one For plain text reports, show the SUM: output line
|
|
even if only one input file is processed.
|
|
--xml Write the results in XML.
|
|
--xsl=<file> Reference <file> as an XSL stylesheet within
|
|
the XML output. If <file> is 1 (numeric one),
|
|
writes a default stylesheet, cloc.xsl (or
|
|
cloc-diff.xsl if --diff is also given).
|
|
This switch forces --xml on.
|
|
--yaml Write the results in YAML.
|
|
|
|
";
|
|
# Help information for options not yet implemented:
|
|
# --inline Process comments that appear at the end
|
|
# of lines containing code.
|
|
# --html Create HTML files of each input file showing
|
|
# comment and code lines in different colors.
|
|
|
|
$| = 1; # flush STDOUT
|
|
my $start_time = get_time();
|
|
my (
|
|
$opt_categorized ,
|
|
$opt_found ,
|
|
@opt_force_lang ,
|
|
$opt_lang_no_ext ,
|
|
@opt_script_lang ,
|
|
$opt_count_diff ,
|
|
$opt_diff ,
|
|
$opt_diff_alignment ,
|
|
$opt_diff_list_file ,
|
|
$opt_diff_list_files ,
|
|
$opt_diff_timeout ,
|
|
$opt_timeout ,
|
|
$opt_html ,
|
|
$opt_ignored ,
|
|
$opt_counted ,
|
|
$opt_show_ext ,
|
|
$opt_show_lang ,
|
|
$opt_progress_rate ,
|
|
$opt_print_filter_stages ,
|
|
$opt_v ,
|
|
$opt_vcs ,
|
|
$opt_version ,
|
|
$opt_include_content ,
|
|
$opt_exclude_content ,
|
|
$opt_exclude_lang ,
|
|
$opt_exclude_list_file ,
|
|
$opt_exclude_dir ,
|
|
$opt_explain ,
|
|
$opt_include_ext ,
|
|
$opt_include_lang ,
|
|
$opt_force_lang_def ,
|
|
$opt_read_lang_def ,
|
|
$opt_write_lang_def ,
|
|
$opt_write_lang_def_incl_dup,
|
|
$opt_strip_code ,
|
|
$opt_strip_comments ,
|
|
$opt_original_dir ,
|
|
$opt_quiet ,
|
|
$opt_report_file ,
|
|
$opt_sdir ,
|
|
$opt_sum_reports ,
|
|
$opt_hide_rate ,
|
|
$opt_processes ,
|
|
$opt_unicode ,
|
|
$opt_no3 , # accept it but don't use it
|
|
$opt_3 ,
|
|
$opt_extract_with ,
|
|
$opt_by_file ,
|
|
$opt_by_file_by_lang ,
|
|
$opt_by_percent ,
|
|
$opt_xml ,
|
|
$opt_xsl ,
|
|
$opt_yaml ,
|
|
$opt_csv ,
|
|
$opt_csv_delimiter ,
|
|
$opt_fullpath ,
|
|
$opt_json ,
|
|
$opt_md ,
|
|
$opt_match_f ,
|
|
@opt_not_match_f ,
|
|
$opt_match_d ,
|
|
@opt_not_match_d ,
|
|
$opt_skip_uniqueness ,
|
|
$opt_list_file ,
|
|
$opt_help ,
|
|
$opt_skip_win_hidden ,
|
|
$opt_read_binary_files ,
|
|
$opt_sql ,
|
|
$opt_sql_append ,
|
|
$opt_sql_project ,
|
|
$opt_sql_style ,
|
|
$opt_inline ,
|
|
$opt_exclude_ext ,
|
|
$opt_ignore_whitespace ,
|
|
$opt_ignore_case ,
|
|
$opt_ignore_case_ext ,
|
|
$opt_follow_links ,
|
|
$opt_autoconf ,
|
|
$opt_sum_one ,
|
|
$opt_stdin_name ,
|
|
$opt_force_on_windows ,
|
|
$opt_force_on_unix , # actually forces !$ON_WINDOWS
|
|
$opt_show_os ,
|
|
$opt_skip_archive ,
|
|
$opt_max_file_size , # in MB
|
|
$opt_use_sloccount ,
|
|
$opt_no_autogen ,
|
|
$opt_force_git ,
|
|
$opt_git_diff_rel ,
|
|
$opt_git_diff_all ,
|
|
$opt_git_diff_simindex ,
|
|
$opt_config_file ,
|
|
$opt_strip_str_comments ,
|
|
$opt_file_encoding ,
|
|
$opt_docstring_as_code ,
|
|
$opt_stat ,
|
|
$opt_summary_cutoff ,
|
|
$opt_skip_leading ,
|
|
$opt_no_recurse ,
|
|
$opt_only_count_files ,
|
|
$opt_fmt ,
|
|
);
|
|
|
|
my $getopt_success = GetOptions( # {{{1
|
|
"by_file|by-file" => \$opt_by_file ,
|
|
"by_file_by_lang|by-file-by-lang" => \$opt_by_file_by_lang ,
|
|
"categorized=s" => \$opt_categorized ,
|
|
"counted=s" => \$opt_counted ,
|
|
"include_ext|include-ext=s" => \$opt_include_ext ,
|
|
"include_lang|include-lang=s" => \$opt_include_lang ,
|
|
"include_content|include-content=s" => \$opt_include_content ,
|
|
"exclude_content|exclude-content=s" => \$opt_exclude_content ,
|
|
"exclude_lang|exclude-lang=s" => \$opt_exclude_lang ,
|
|
"exclude_dir|exclude-dir=s" => \$opt_exclude_dir ,
|
|
"exclude_list_file|exclude-list-file=s" => \$opt_exclude_list_file ,
|
|
"explain=s" => \$opt_explain ,
|
|
"extract_with|extract-with=s" => \$opt_extract_with ,
|
|
"found=s" => \$opt_found ,
|
|
"count_and_diff|count-and-diff" => \$opt_count_diff ,
|
|
"diff" => \$opt_diff ,
|
|
"diff-alignment|diff_alignment=s" => \$opt_diff_alignment ,
|
|
"diff-timeout|diff_timeout=i" => \$opt_diff_timeout ,
|
|
"diff-list-file|diff_list_file=s" => \$opt_diff_list_file ,
|
|
"diff-list-files|diff_list_files" => \$opt_diff_list_files ,
|
|
"timeout=i" => \$opt_timeout ,
|
|
"html" => \$opt_html ,
|
|
"ignored=s" => \$opt_ignored ,
|
|
"quiet" => \$opt_quiet ,
|
|
"force_lang_def|force-lang-def=s" => \$opt_force_lang_def ,
|
|
"read_lang_def|read-lang-def=s" => \$opt_read_lang_def ,
|
|
"show_ext|show-ext:s" => \$opt_show_ext ,
|
|
"show_lang|show-lang:s" => \$opt_show_lang ,
|
|
"progress_rate|progress-rate=i" => \$opt_progress_rate ,
|
|
"print_filter_stages|print-filter-stages" => \$opt_print_filter_stages ,
|
|
"report_file|report-file=s" => \$opt_report_file ,
|
|
"out=s" => \$opt_report_file ,
|
|
"script_lang|script-lang=s" => \@opt_script_lang ,
|
|
"sdir=s" => \$opt_sdir ,
|
|
"skip_uniqueness|skip-uniqueness" => \$opt_skip_uniqueness ,
|
|
"strip_code|strip-code=s" => \$opt_strip_code ,
|
|
"strip_comments|strip-comments=s" => \$opt_strip_comments ,
|
|
"original_dir|original-dir" => \$opt_original_dir ,
|
|
"sum_reports|sum-reports" => \$opt_sum_reports ,
|
|
"hide_rate|hide-rate" => \$opt_hide_rate ,
|
|
"processes=n" => \$opt_processes ,
|
|
"unicode" => \$opt_unicode ,
|
|
"no3" => \$opt_no3 , # ignored
|
|
"3" => \$opt_3 ,
|
|
"v|verbose:i" => \$opt_v ,
|
|
"vcs=s" => \$opt_vcs ,
|
|
"version" => \$opt_version ,
|
|
"write_lang_def|write-lang-def=s" => \$opt_write_lang_def ,
|
|
"write_lang_def_incl_dup|write-lang-def-incl-dup=s" => \$opt_write_lang_def_incl_dup,
|
|
"xml" => \$opt_xml ,
|
|
"xsl=s" => \$opt_xsl ,
|
|
"force_lang|force-lang=s" => \@opt_force_lang ,
|
|
"lang_no_ext|lang-no-ext=s" => \$opt_lang_no_ext ,
|
|
"yaml" => \$opt_yaml ,
|
|
"csv" => \$opt_csv ,
|
|
"csv_delimiter|csv-delimiter=s" => \$opt_csv_delimiter ,
|
|
"json" => \$opt_json ,
|
|
"md" => \$opt_md ,
|
|
"fullpath" => \$opt_fullpath ,
|
|
"match_f|match-f=s" => \$opt_match_f ,
|
|
"not_match_f|not-match-f=s" => \@opt_not_match_f ,
|
|
"match_d|match-d=s" => \$opt_match_d ,
|
|
"not_match_d|not-match-d=s" => \@opt_not_match_d ,
|
|
"list_file|list-file=s" => \$opt_list_file ,
|
|
"help" => \$opt_help ,
|
|
"skip_win_hidden|skip-win-hidden" => \$opt_skip_win_hidden ,
|
|
"read_binary_files|read-binary-files" => \$opt_read_binary_files ,
|
|
"sql=s" => \$opt_sql ,
|
|
"sql_project|sql-project=s" => \$opt_sql_project ,
|
|
"sql_append|sql-append" => \$opt_sql_append ,
|
|
"sql_style|sql-style=s" => \$opt_sql_style ,
|
|
"inline" => \$opt_inline ,
|
|
"exclude_ext|exclude-ext=s" => \$opt_exclude_ext ,
|
|
"ignore_whitespace|ignore-whitespace" => \$opt_ignore_whitespace ,
|
|
"ignore_case|ignore-case" => \$opt_ignore_case ,
|
|
"ignore_case_ext|ignore-case-ext" => \$opt_ignore_case_ext ,
|
|
"follow_links|follow-links" => \$opt_follow_links ,
|
|
"autoconf" => \$opt_autoconf ,
|
|
"sum_one|sum-one" => \$opt_sum_one ,
|
|
"by_percent|by-percent=s" => \$opt_by_percent ,
|
|
"stdin_name|stdin-name=s" => \$opt_stdin_name ,
|
|
"windows" => \$opt_force_on_windows ,
|
|
"unix" => \$opt_force_on_unix ,
|
|
"show_os|show-os" => \$opt_show_os ,
|
|
"skip_archive|skip-archive=s" => \$opt_skip_archive ,
|
|
"max_file_size|max-file-size=f" => \$opt_max_file_size ,
|
|
"use_sloccount|use-sloccount" => \$opt_use_sloccount ,
|
|
"no_autogen|no-autogen" => \$opt_no_autogen ,
|
|
"git" => \$opt_force_git ,
|
|
"git_diff_rel|git-diff-rel" => \$opt_git_diff_rel ,
|
|
"git_diff_all|git-diff-all" => \$opt_git_diff_all ,
|
|
# "git_diff_simindex|git-diff-simindex" => \$opt_git_diff_simindex ,
|
|
"config=s" => \$opt_config_file ,
|
|
"strip_str_comments|strip-str-comments" => \$opt_strip_str_comments ,
|
|
"file_encoding|file-encoding=s" => \$opt_file_encoding ,
|
|
"docstring_as_code|docstring-as-code" => \$opt_docstring_as_code ,
|
|
"stat" => \$opt_stat ,
|
|
"summary_cutoff|summary-cutoff=s" => \$opt_summary_cutoff ,
|
|
"skip_leading|skip-leading:s" => \$opt_skip_leading ,
|
|
"no_recurse|no-recurse" => \$opt_no_recurse ,
|
|
"only_count_files|only-count-files" => \$opt_only_count_files ,
|
|
"fmt=i" => \$opt_fmt ,
|
|
);
|
|
# 1}}}
|
|
$config_file = $opt_config_file if defined $opt_config_file;
|
|
load_from_config_file($config_file, # {{{2
|
|
\$opt_by_file ,
|
|
\$opt_by_file_by_lang ,
|
|
\$opt_categorized ,
|
|
\$opt_counted ,
|
|
\$opt_include_ext ,
|
|
\$opt_include_lang ,
|
|
\$opt_include_content ,
|
|
\$opt_exclude_content ,
|
|
\$opt_exclude_lang ,
|
|
\$opt_exclude_dir ,
|
|
\$opt_exclude_list_file ,
|
|
\$opt_explain ,
|
|
\$opt_extract_with ,
|
|
\$opt_found ,
|
|
\$opt_count_diff ,
|
|
\$opt_diff_list_files ,
|
|
\$opt_diff ,
|
|
\$opt_diff_alignment ,
|
|
\$opt_diff_timeout ,
|
|
\$opt_timeout ,
|
|
\$opt_html ,
|
|
\$opt_ignored ,
|
|
\$opt_quiet ,
|
|
\$opt_force_lang_def ,
|
|
\$opt_read_lang_def ,
|
|
\$opt_show_ext ,
|
|
\$opt_show_lang ,
|
|
\$opt_progress_rate ,
|
|
\$opt_print_filter_stages ,
|
|
\$opt_report_file ,
|
|
\@opt_script_lang ,
|
|
\$opt_sdir ,
|
|
\$opt_skip_uniqueness ,
|
|
\$opt_strip_code ,
|
|
\$opt_strip_comments ,
|
|
\$opt_original_dir ,
|
|
\$opt_sum_reports ,
|
|
\$opt_hide_rate ,
|
|
\$opt_processes ,
|
|
\$opt_unicode ,
|
|
\$opt_3 ,
|
|
\$opt_v ,
|
|
\$opt_vcs ,
|
|
\$opt_version ,
|
|
\$opt_write_lang_def ,
|
|
\$opt_write_lang_def_incl_dup,
|
|
\$opt_xml ,
|
|
\$opt_xsl ,
|
|
\@opt_force_lang ,
|
|
\$opt_lang_no_ext ,
|
|
\$opt_yaml ,
|
|
\$opt_csv ,
|
|
\$opt_csv_delimiter ,
|
|
\$opt_json ,
|
|
\$opt_md ,
|
|
\$opt_fullpath ,
|
|
\$opt_match_f ,
|
|
\@opt_not_match_f ,
|
|
\$opt_match_d ,
|
|
\@opt_not_match_d ,
|
|
\$opt_list_file ,
|
|
\$opt_help ,
|
|
\$opt_skip_win_hidden ,
|
|
\$opt_read_binary_files ,
|
|
\$opt_sql ,
|
|
\$opt_sql_project ,
|
|
\$opt_sql_append ,
|
|
\$opt_sql_style ,
|
|
\$opt_inline ,
|
|
\$opt_exclude_ext ,
|
|
\$opt_ignore_whitespace ,
|
|
\$opt_ignore_case ,
|
|
\$opt_ignore_case_ext ,
|
|
\$opt_follow_links ,
|
|
\$opt_autoconf ,
|
|
\$opt_sum_one ,
|
|
\$opt_by_percent ,
|
|
\$opt_stdin_name ,
|
|
\$opt_force_on_windows ,
|
|
\$opt_force_on_unix ,
|
|
\$opt_show_os ,
|
|
\$opt_skip_archive ,
|
|
\$opt_max_file_size ,
|
|
\$opt_use_sloccount ,
|
|
\$opt_no_autogen ,
|
|
\$opt_force_git ,
|
|
\$opt_strip_str_comments ,
|
|
\$opt_file_encoding ,
|
|
\$opt_docstring_as_code ,
|
|
\$opt_stat ,
|
|
); # 2}}} Not pretty. Not at all.
|
|
if ($opt_version) {
|
|
printf "$VERSION\n";
|
|
exit;
|
|
}
|
|
my $opt_git = 0;
|
|
$opt_git = 1 if defined($opt_git_diff_all) or
|
|
defined($opt_git_diff_rel) or
|
|
(defined($opt_vcs) and ($opt_vcs eq "git"));
|
|
$opt_by_file = 1 if defined $opt_by_file_by_lang;
|
|
$opt_fmt = 0 unless defined $opt_fmt;
|
|
if ($opt_fmt) {
|
|
$opt_by_file = 1;
|
|
$opt_json = 1;
|
|
}
|
|
my $CLOC_XSL = "cloc.xsl"; # created with --xsl
|
|
$CLOC_XSL = "cloc-diff.xsl" if $opt_diff;
|
|
die "\n" unless $getopt_success;
|
|
print $usage and exit if $opt_help;
|
|
my %Exclude_Language = ();
|
|
%Exclude_Language = map { $_ => 1 } split(/,/, $opt_exclude_lang)
|
|
if $opt_exclude_lang;
|
|
my %Exclude_Dir = ();
|
|
%Exclude_Dir = map { $_ => 1 } split(/,/, $opt_exclude_dir )
|
|
if $opt_exclude_dir ;
|
|
die unless exclude_dir_validates(\%Exclude_Dir);
|
|
my %Include_Ext = ();
|
|
%Include_Ext = map { $_ => 1 } split(/,/, $opt_include_ext)
|
|
if $opt_include_ext;
|
|
my %Include_Language = (); # keys are lower case language names
|
|
%Include_Language = map { lc($_) => 1 } split(/,/, $opt_include_lang)
|
|
if $opt_include_lang;
|
|
# Forcibly exclude .svn, .cvs, .hg, .git, .bzr directories. The contents of these
|
|
# directories often conflict with files of interest.
|
|
$opt_exclude_dir = 1;
|
|
$Exclude_Dir{".svn"} = 1;
|
|
$Exclude_Dir{".cvs"} = 1;
|
|
$Exclude_Dir{".hg"} = 1;
|
|
$Exclude_Dir{".git"} = 1;
|
|
$Exclude_Dir{".bzr"} = 1;
|
|
$Exclude_Dir{".snapshot"} = 1; # NetApp backups
|
|
$Exclude_Dir{".config"} = 1;
|
|
$opt_count_diff = defined $opt_count_diff ? 1 : 0;
|
|
$opt_diff = 1 if $opt_diff_alignment or
|
|
$opt_diff_list_file or
|
|
$opt_diff_list_files or
|
|
$opt_git_diff_rel or
|
|
$opt_git_diff_all or
|
|
$opt_git_diff_simindex;
|
|
$opt_force_git = 1 if $opt_git_diff_rel or
|
|
$opt_git_diff_all or
|
|
$opt_git_diff_simindex;
|
|
$opt_diff_alignment = 0 if $opt_diff_list_file;
|
|
$opt_exclude_ext = "" unless $opt_exclude_ext;
|
|
$opt_ignore_whitespace = 0 unless $opt_ignore_whitespace;
|
|
$opt_ignore_case = 0 unless $opt_ignore_case;
|
|
$opt_ignore_case_ext = 0 unless $opt_ignore_case_ext;
|
|
$opt_lang_no_ext = 0 unless $opt_lang_no_ext;
|
|
$opt_follow_links = 0 unless $opt_follow_links;
|
|
if (defined $opt_diff_timeout) {
|
|
# if defined but with a value of <= 0, set to 2^31-1 seconds = 68 years
|
|
$opt_diff_timeout = 2**31-1 unless $opt_diff_timeout > 0;
|
|
} else {
|
|
$opt_diff_timeout =10; # seconds
|
|
}
|
|
if (defined $opt_timeout) {
|
|
# if defined but with a value of <= 0, set to 2^31-1 seconds = 68 years
|
|
$opt_timeout = 2**31-1 unless $opt_timeout > 0;
|
|
# else is computed dynamically, ref $max_duration_sec
|
|
}
|
|
$opt_csv = 0 unless defined $opt_csv;
|
|
$opt_csv = 1 if $opt_csv_delimiter;
|
|
$ON_WINDOWS = 1 if $opt_force_on_windows;
|
|
$ON_WINDOWS = 0 if $opt_force_on_unix;
|
|
$opt_max_file_size = 100 unless $opt_max_file_size;
|
|
my $HAVE_SLOCCOUNT_c_count = 0;
|
|
if (!$ON_WINDOWS and $opt_use_sloccount) {
|
|
# Only bother doing this kludgey test is user explicitly wants
|
|
# to use SLOCCount. Debian based systems will hang if just doing
|
|
# external_utility_exists("c_count")
|
|
# if c_count is in $PATH; c_count expects to have input.
|
|
$HAVE_SLOCCOUNT_c_count = external_utility_exists("c_count /bin/sh");
|
|
}
|
|
if ($opt_use_sloccount) {
|
|
if (!$HAVE_SLOCCOUNT_c_count) {
|
|
warn "c_count could not be found; ignoring --use-sloccount\n";
|
|
$opt_use_sloccount = 0;
|
|
} else {
|
|
warn "Using c_count, php_count, xml_count, pascal_count from SLOCCount\n";
|
|
warn "--diff is disabled with --use-sloccount\n" if $opt_diff;
|
|
warn "--count-and-diff is disabled with --use-sloccount\n" if $opt_count_diff;
|
|
warn "--unicode is disabled with --use-sloccount\n" if $opt_unicode;
|
|
warn "--strip-comments is disabled with --use-sloccount\n" if $opt_strip_comments;
|
|
warn "--strip-code is disabled with --use-sloccount\n" if $opt_strip_code;
|
|
$opt_diff = 0;
|
|
$opt_count_diff = undef;
|
|
$opt_unicode = 0;
|
|
$opt_strip_comments = 0;
|
|
$opt_strip_code = 0;
|
|
}
|
|
}
|
|
die "--strip-comments and --strip-code are mutually exclusive\n" if
|
|
$opt_strip_comments and $opt_strip_code;
|
|
$opt_vcs = 0 if $opt_force_git;
|
|
|
|
# replace Windows path separators with /
|
|
if ($ON_WINDOWS) {
|
|
map { s{\\}{/}g } @ARGV;
|
|
if ($opt_git) {
|
|
# PowerShell tab expansion automatically prefixes local directories
|
|
# with ".\" (now mapped to "./"). git ls-files output does not
|
|
# include this. Strip this prefix to permit clean matches.
|
|
map { s{^\./}{} } @ARGV;
|
|
}
|
|
}
|
|
|
|
my @COUNT_DIFF_ARGV = undef;
|
|
my $COUNT_DIFF_report_file = undef;
|
|
if ($opt_count_diff and !$opt_diff_list_file) {
|
|
die "--count-and-diff requires two arguments; got ", scalar @ARGV, "\n"
|
|
if scalar @ARGV != 2;
|
|
# prefix with a dummy term so that $opt_count_diff is the
|
|
# index into @COUNT_DIFF_ARGV to work on at each pass
|
|
@COUNT_DIFF_ARGV = (undef, $ARGV[0],
|
|
$ARGV[1],
|
|
[$ARGV[0], $ARGV[1]]); # 3rd pass: diff them
|
|
$COUNT_DIFF_report_file = $opt_report_file if $opt_report_file;
|
|
}
|
|
|
|
# Options defaults:
|
|
$opt_quiet = 1 if ($opt_md or $opt_json or !(-t STDOUT))
|
|
and !defined $opt_report_file;
|
|
$opt_progress_rate = 100 unless defined $opt_progress_rate;
|
|
$opt_progress_rate = 0 if defined $opt_quiet;
|
|
if (!defined $opt_v) {
|
|
$opt_v = 0;
|
|
} elsif (!$opt_v) {
|
|
$opt_v = 1;
|
|
}
|
|
if (defined $opt_xsl) {
|
|
$opt_xsl = $CLOC_XSL if $opt_xsl eq "1";
|
|
$opt_xml = 1;
|
|
}
|
|
my $skip_generate_report = 0;
|
|
$opt_sql_style = 0 unless defined $opt_sql_style;
|
|
$opt_sql = 0 unless $opt_sql_style or defined $opt_sql;
|
|
if ($opt_sql eq "-" || $opt_sql eq "1") { # stream SQL output to STDOUT
|
|
$opt_quiet = 1;
|
|
$skip_generate_report = 1;
|
|
$opt_by_file = 1;
|
|
$opt_sum_reports = 0;
|
|
$opt_progress_rate = 0;
|
|
} elsif ($opt_sql) { # write SQL output to a file
|
|
$opt_by_file = 1;
|
|
$skip_generate_report = 1;
|
|
$opt_sum_reports = 0;
|
|
}
|
|
if ($opt_sql_style) {
|
|
$opt_sql_style = lc $opt_sql_style;
|
|
if (!grep { lc $_ eq $opt_sql_style } qw ( Oracle Named_Columns )) {
|
|
die "'$opt_sql_style' is not a recognized SQL style.\n";
|
|
}
|
|
}
|
|
$opt_by_percent = '' unless defined $opt_by_percent;
|
|
if ($opt_by_percent and $opt_by_percent !~ m/^(c|cm|cb|cmb)$/i) {
|
|
die "--by-percent must be either 'c', 'cm', 'cb', or 'cmb'\n";
|
|
}
|
|
$opt_by_percent = lc $opt_by_percent;
|
|
|
|
if (defined $opt_vcs) {
|
|
if ($opt_vcs eq "auto") {
|
|
if (is_dir(".git")) {
|
|
$opt_vcs = "git";
|
|
} elsif (is_dir(".svn")) {
|
|
$opt_vcs = "svn";
|
|
} else {
|
|
warn "--vcs auto: unable to determine versioning system\n";
|
|
}
|
|
}
|
|
if ($opt_vcs eq "git") {
|
|
$opt_vcs = "git ls-files";
|
|
my @submodules = invoke_generator('git submodule status', \@ARGV);
|
|
foreach my $SM (@submodules) {
|
|
$SM =~ s/^\s+//; # may have leading space
|
|
$SM =~ s/\(\S+\)\s*$//; # may end with something like (heads/master)
|
|
my ($checksum, $dir) = split(' ', $SM, 2);
|
|
$dir =~ s/\s+$//;
|
|
$Exclude_Dir{$dir} = 1;
|
|
}
|
|
} elsif ($opt_vcs eq "svn") {
|
|
$opt_vcs = "svn list -R";
|
|
}
|
|
}
|
|
|
|
my $list_no_autogen = 0;
|
|
if (defined $opt_no_autogen and scalar @ARGV == 1 and $ARGV[0] eq "list") {
|
|
$list_no_autogen = 1;
|
|
}
|
|
if ($opt_summary_cutoff) {
|
|
my $error = summary_cutoff_error($opt_summary_cutoff);
|
|
die "$error\n" if $error;
|
|
}
|
|
|
|
if (!$opt_config_file) {
|
|
# if not explicitly given, look for a config file in other
|
|
# possible locations
|
|
my $other_loc = check_alternate_config_files($opt_list_file,
|
|
$opt_exclude_list_file, $opt_read_lang_def, $opt_force_lang_def,
|
|
$opt_diff_list_file);
|
|
$opt_config_file = $other_loc if $other_loc;
|
|
}
|
|
|
|
# --match-d and --not-match-d: if end with a trailing slash, update the
|
|
# regex to be either slash or end of line since File::Find::find() will
|
|
# not see the trailing slash in leaf directories (#732, #833).
|
|
if ($opt_match_d and $opt_match_d =~ m{/$}) {
|
|
$opt_match_d =~ s{/$}{(/|\$)};
|
|
}
|
|
foreach my $nmd (@opt_not_match_d) {
|
|
if ($nmd =~ m{/$}) {
|
|
$nmd =~ s{/$}{(/|\$)};
|
|
}
|
|
}
|
|
|
|
die $brief_usage unless defined $opt_version or
|
|
defined $opt_show_lang or
|
|
defined $opt_show_ext or
|
|
defined $opt_show_os or
|
|
defined $opt_write_lang_def or
|
|
defined $opt_write_lang_def_incl_dup or
|
|
defined $opt_list_file or
|
|
defined $opt_diff_list_file or
|
|
defined $opt_vcs or
|
|
defined $opt_xsl or
|
|
defined $opt_explain or
|
|
$list_no_autogen or
|
|
scalar @ARGV >= 1;
|
|
if (!$opt_diff_list_file) {
|
|
die "--diff requires two arguments; got ", scalar @ARGV, "\n"
|
|
if $opt_diff and !$opt_sum_reports and scalar @ARGV != 2;
|
|
die "--diff arguments are identical; nothing done", "\n"
|
|
if $opt_diff and !$opt_sum_reports and scalar @ARGV == 2
|
|
and $ARGV[0] eq $ARGV[1];
|
|
}
|
|
trick_pp_packer_encode() if $ON_WINDOWS and $opt_file_encoding;
|
|
$File::Find::dont_use_nlink = 1 if $opt_stat or top_level_SMB_dir(\@ARGV);
|
|
my @git_similarity = (); # only populated with --git-diff-simindex
|
|
my %git_metadata = ();
|
|
get_git_metadata(\@ARGV, \%git_metadata) if $opt_force_git;
|
|
#use Data::Dumper;
|
|
#print Dumper(\%git_metadata);
|
|
replace_git_hash_with_tarfile(\@ARGV, \@git_similarity);
|
|
# 1}}}
|
|
# Step 1: Initialize global constants. {{{1
|
|
#
|
|
my $nFiles_Found = 0; # updated in make_file_list
|
|
my (%Language_by_Extension, %Language_by_Script,
|
|
%Filters_by_Language, %Not_Code_Extension, %Not_Code_Filename,
|
|
%Language_by_File, %Scale_Factor, %Known_Binary_Archives,
|
|
%Language_by_Prefix, %EOL_Continuation_re,
|
|
);
|
|
my $ALREADY_SHOWED_HEADER = 0;
|
|
my $ALREADY_SHOWED_XML_SECTION = 0;
|
|
my %Error_Codes = ( 'Unable to read' => -1,
|
|
'Neither file nor directory' => -2,
|
|
'Diff error (quoted comments?)' => -3,
|
|
'Diff error, exceeded timeout' => -4,
|
|
'Line count, exceeded timeout' => -5,
|
|
);
|
|
my %Extension_Collision = (
|
|
'ADSO/IDSM' => [ 'adso' ] ,
|
|
'C#/Smalltalk' => [ 'cs' ] ,
|
|
'D/dtrace' => [ 'd' ] ,
|
|
'F#/Forth' => [ 'fs' ] ,
|
|
'Fortran 77/Forth' => [ 'f', 'for' ] ,
|
|
'IDL/Qt Project/Prolog/ProGuard' => [ 'pro' ] ,
|
|
'Lisp/Julia' => [ 'jl' ] ,
|
|
'Lisp/OpenCL' => [ 'cl' ] ,
|
|
'MATLAB/Mathematica/Objective-C/MUMPS/Mercury' => [ 'm' ] ,
|
|
'Pascal/Puppet' => [ 'pp' ] ,
|
|
'Perl/Prolog' => [ 'pl', 'PL' ] ,
|
|
'PHP/Pascal/Fortran' => [ 'inc' ] ,
|
|
'Raku/Prolog' => [ 'p6', 'P6' ] ,
|
|
'Qt/Glade' => [ 'ui' ] ,
|
|
'TypeScript/Qt Linguist' => [ 'ts' ] ,
|
|
'Verilog-SystemVerilog/Coq' => [ 'v' ] ,
|
|
'Visual Basic/TeX/Apex Class' => [ 'cls' ] ,
|
|
'Scheme/SaltStack' => [ 'sls' ] ,
|
|
);
|
|
my @Autogen_to_ignore = no_autogen_files($list_no_autogen);
|
|
if ($opt_force_lang_def) {
|
|
# replace cloc's definitions
|
|
read_lang_def(
|
|
$opt_force_lang_def , # Sample values:
|
|
\%Language_by_Extension, # Language_by_Extension{f} = 'Fortran 77'
|
|
\%Language_by_Script , # Language_by_Script{sh} = 'Bourne Shell'
|
|
\%Language_by_File , # Language_by_File{makefile} = 'make'
|
|
\%Filters_by_Language , # Filters_by_Language{Bourne Shell}[0] =
|
|
# [ 'remove_matches' , '^\s*#' ]
|
|
\%Not_Code_Extension , # Not_Code_Extension{jpg} = 1
|
|
\%Not_Code_Filename , # Not_Code_Filename{README} = 1
|
|
\%Scale_Factor , # Scale_Factor{Perl} = 4.0
|
|
\%EOL_Continuation_re , # EOL_Continuation_re{C++} = '\\$'
|
|
);
|
|
} else {
|
|
set_constants( #
|
|
\%Language_by_Extension, # Language_by_Extension{f} = 'Fortran 77'
|
|
\%Language_by_Script , # Language_by_Script{sh} = 'Bourne Shell'
|
|
\%Language_by_File , # Language_by_File{makefile} = 'make'
|
|
\%Language_by_Prefix , # Language_by_Prefix{Dockerfile} = 'Dockerfile'
|
|
\%Filters_by_Language , # Filters_by_Language{Bourne Shell}[0] =
|
|
# [ 'remove_matches' , '^\s*#' ]
|
|
\%Not_Code_Extension , # Not_Code_Extension{jpg} = 1
|
|
\%Not_Code_Filename , # Not_Code_Filename{README} = 1
|
|
\%Scale_Factor , # Scale_Factor{Perl} = 4.0
|
|
\%Known_Binary_Archives, # Known_Binary_Archives{.tar} = 1
|
|
\%EOL_Continuation_re , # EOL_Continuation_re{C++} = '\\$'
|
|
);
|
|
if ($opt_no_autogen) {
|
|
foreach my $F (@Autogen_to_ignore) { $Not_Code_Filename{ $F } = 1; }
|
|
}
|
|
}
|
|
if ($opt_read_lang_def) {
|
|
# augment cloc's definitions (keep cloc's where there are overlaps)
|
|
merge_lang_def(
|
|
$opt_read_lang_def , # Sample values:
|
|
\%Language_by_Extension, # Language_by_Extension{f} = 'Fortran 77'
|
|
\%Language_by_Script , # Language_by_Script{sh} = 'Bourne Shell'
|
|
\%Language_by_File , # Language_by_File{makefile} = 'make'
|
|
\%Filters_by_Language , # Filters_by_Language{Bourne Shell}[0] =
|
|
# [ 'remove_matches' , '^\s*#' ]
|
|
\%Not_Code_Extension , # Not_Code_Extension{jpg} = 1
|
|
\%Not_Code_Filename , # Not_Code_Filename{README} = 1
|
|
\%Scale_Factor , # Scale_Factor{Perl} = 4.0
|
|
\%EOL_Continuation_re , # EOL_Continuation_re{C++} = '\\$'
|
|
);
|
|
}
|
|
if ($opt_lang_no_ext and !defined $Filters_by_Language{$opt_lang_no_ext}) {
|
|
die_unknown_lang($opt_lang_no_ext, "--lang-no-ext")
|
|
}
|
|
check_scale_existence(\%Filters_by_Language, \%Language_by_Extension,
|
|
\%Scale_Factor);
|
|
|
|
my $nCounted = 0;
|
|
|
|
# Process command line provided extension-to-language mapping overrides.
|
|
# Make a hash of known languages in lower case for easier matching.
|
|
my %Recognized_Language_lc = (); # key = language name in lc, value = true name
|
|
foreach my $language (keys %Filters_by_Language) {
|
|
my $lang_lc = lc $language;
|
|
$Recognized_Language_lc{$lang_lc} = $language;
|
|
}
|
|
my %Forced_Extension = (); # file name extensions which user wants to count
|
|
my $All_One_Language = 0; # set to !0 if --force-lang's <ext> is missing
|
|
foreach my $pair (@opt_force_lang) {
|
|
my ($lang, $extension) = split(',', $pair);
|
|
my $lang_lc = lc $lang;
|
|
if (defined $extension) {
|
|
$Forced_Extension{$extension} = $lang;
|
|
|
|
die_unknown_lang($lang, "--force-lang")
|
|
unless $Recognized_Language_lc{$lang_lc};
|
|
|
|
$Language_by_Extension{$extension} = $Recognized_Language_lc{$lang_lc};
|
|
} else {
|
|
# the scary case--count everything as this language
|
|
$All_One_Language = $Recognized_Language_lc{$lang_lc};
|
|
}
|
|
}
|
|
|
|
foreach my $pair (@opt_script_lang) {
|
|
my ($lang, $script_name) = split(',', $pair);
|
|
my $lang_lc = lc $lang;
|
|
if (!defined $script_name) {
|
|
die "The --script-lang option requires a comma separated pair of ".
|
|
"strings.\n";
|
|
}
|
|
|
|
die_unknown_lang($lang, "--script-lang")
|
|
unless $Recognized_Language_lc{$lang_lc};
|
|
|
|
$Language_by_Script{$script_name} = $Recognized_Language_lc{$lang_lc};
|
|
}
|
|
|
|
# If user provided a language definition file, make sure those
|
|
# extensions aren't rejected.
|
|
foreach my $ext (%Language_by_Extension) {
|
|
next unless defined $Not_Code_Extension{$ext};
|
|
delete $Not_Code_Extension{$ext};
|
|
}
|
|
|
|
# If user provided file extensions to ignore, add these to
|
|
# the exclusion list.
|
|
foreach my $ext (map { $_ => 1 } split(/,/, $opt_exclude_ext ) ) {
|
|
$ext = lc $ext if $ON_WINDOWS or $opt_ignore_case_ext;
|
|
$Not_Code_Extension{$ext} = 1;
|
|
}
|
|
|
|
# If SQL or --by-file output is requested, keep track of directory names
|
|
# generated by File::Temp::tempdir and used to temporarily hold the results
|
|
# of compressed archives. Contents of the SQL table 't' will be much
|
|
# cleaner if these meaningless directory names are stripped from the front
|
|
# of files pulled from the archives.
|
|
my %TEMP_DIR = ();
|
|
my $TEMP_OFF = 0; # Needed for --sdir; keep track of the number of
|
|
# scratch directories made in this run to avoid
|
|
# file overwrites by multiple extractions to same
|
|
# sdir.
|
|
# Also track locations where temporary installations, if necessary, of
|
|
# Algorithm::Diff and/or Regexp::Common are done. Make sure these
|
|
# directories are not counted as inputs (ref bug #80 2012-11-23).
|
|
my %TEMP_INST = ();
|
|
|
|
# invert %Language_by_Script hash to get an easy-to-look-up list of known
|
|
# scripting languages
|
|
my %Script_Language = map { $_ => 1 } values %Language_by_Script ;
|
|
# 1}}}
|
|
# Step 2: Early exits for display, summation. {{{1
|
|
#
|
|
print_extension_info( $opt_show_ext ) if defined $opt_show_ext ;
|
|
print_language_info( $opt_show_lang, '') if defined $opt_show_lang;
|
|
print_language_filters( $opt_explain ) if defined $opt_explain ;
|
|
exit if (defined $opt_show_ext) or
|
|
(defined $opt_show_lang) or
|
|
(defined $opt_explain) or
|
|
$list_no_autogen;
|
|
|
|
Top_of_Processing_Loop:
|
|
# Sorry, coding purists. Using a goto to implement --count-and-diff
|
|
# which has to do three passes over the main code, starting with
|
|
# a clean slate each time.
|
|
if ($opt_count_diff) {
|
|
@ARGV = ( $COUNT_DIFF_ARGV[ $opt_count_diff ] );
|
|
if ($opt_count_diff == 3) {
|
|
$opt_diff = 1;
|
|
@ARGV = @{$COUNT_DIFF_ARGV[ $opt_count_diff ]}; # last arg is list of list
|
|
} elsif ($opt_diff_list_files) {
|
|
$opt_diff = 0;
|
|
}
|
|
if ($opt_report_file) {
|
|
# Instead of just one output file, will have three.
|
|
# Keep their names unique otherwise results are clobbered.
|
|
# Replace file path separators with underscores otherwise
|
|
# may end up with illegal file names.
|
|
my ($fn_0, $fn_1) = (undef, undef);
|
|
if ($ON_WINDOWS) {
|
|
($fn_0 = $ARGV[0]) =~ s{\\}{_}g;
|
|
$fn_0 =~ s{:}{_}g;
|
|
$fn_0 =~ s{/}{_}g;
|
|
($fn_1 = $ARGV[1]) =~ s{\\}{_}g if defined $ARGV[1];
|
|
$fn_1 =~ s{:}{_}g if defined $ARGV[1];
|
|
$fn_1 =~ s{/}{_}g if defined $ARGV[1];
|
|
} else {
|
|
($fn_0 = $ARGV[0]) =~ s{/}{_}g;
|
|
($fn_1 = $ARGV[1]) =~ s{/}{_}g if defined $ARGV[1];
|
|
}
|
|
|
|
if ($opt_count_diff == 3) {
|
|
$opt_report_file = $COUNT_DIFF_report_file . ".diff.$fn_0.$fn_1";
|
|
} else {
|
|
$opt_report_file = $COUNT_DIFF_report_file . ".$fn_0";
|
|
}
|
|
} else {
|
|
# STDOUT; print a header showing what it's working on
|
|
if ($opt_count_diff == 3) {
|
|
print "\ndiff $ARGV[0] $ARGV[1]::\n";
|
|
} else {
|
|
print "\n" if $opt_count_diff > 1;
|
|
print "$ARGV[0]::\n";
|
|
}
|
|
}
|
|
$ALREADY_SHOWED_HEADER = 0;
|
|
$ALREADY_SHOWED_XML_SECTION = 0;
|
|
}
|
|
|
|
#print "Before glob have [", join(",", @ARGV), "]\n";
|
|
@ARGV = windows_glob(@ARGV) if $ON_WINDOWS;
|
|
#print "after glob have [", join(",", @ARGV), "]\n";
|
|
|
|
# filter out archive files if requested to do so
|
|
if (defined $opt_skip_archive) {
|
|
my @non_archive = ();
|
|
foreach my $candidate (@ARGV) {
|
|
if ($candidate !~ m/${opt_skip_archive}$/) {
|
|
push @non_archive, $candidate;
|
|
|
|
}
|
|
}
|
|
@ARGV = @non_archive;
|
|
}
|
|
|
|
if ($opt_sum_reports and $opt_diff) {
|
|
my @results = ();
|
|
if ($opt_csv and !defined($opt_csv_delimiter)) {
|
|
$opt_csv_delimiter = ",";
|
|
}
|
|
if ($opt_list_file) { # read inputs from the list file
|
|
my @list = read_list_file($opt_list_file);
|
|
if ($opt_csv) {
|
|
@results = combine_csv_diffs($opt_csv_delimiter, \@list);
|
|
} else {
|
|
@results = combine_diffs(\@list);
|
|
}
|
|
} elsif ($opt_vcs) { # read inputs from the VCS generator
|
|
my @list = invoke_generator($opt_vcs, \@ARGV);
|
|
if ($opt_csv) {
|
|
@results = combine_csv_diffs($opt_csv_delimiter, \@list);
|
|
} else {
|
|
@results = combine_diffs(\@list);
|
|
}
|
|
} else { # get inputs from the command line
|
|
if ($opt_csv) {
|
|
@results = combine_csv_diffs($opt_csv_delimiter, \@ARGV);
|
|
} else {
|
|
@results = combine_diffs(\@ARGV);
|
|
}
|
|
}
|
|
if ($opt_report_file) {
|
|
write_file($opt_report_file, {}, @results);
|
|
} else {
|
|
print "\n", join("\n", @results), "\n";
|
|
}
|
|
exit;
|
|
}
|
|
if ($opt_sum_reports) {
|
|
my %Results = ();
|
|
foreach my $type( "by language", "by report file" ) {
|
|
my $found_lang = undef;
|
|
if ($opt_list_file or $opt_vcs) {
|
|
# read inputs from the list file
|
|
my @list;
|
|
if ($opt_vcs) {
|
|
@list = invoke_generator($opt_vcs, \@ARGV);
|
|
} else {
|
|
@list = read_list_file($opt_list_file);
|
|
}
|
|
$found_lang = combine_results(\@list,
|
|
$type,
|
|
\%{$Results{ $type }},
|
|
\%Filters_by_Language );
|
|
} else { # get inputs from the command line
|
|
$found_lang = combine_results(\@ARGV,
|
|
$type,
|
|
\%{$Results{ $type }},
|
|
\%Filters_by_Language );
|
|
}
|
|
next unless %Results;
|
|
my $end_time = get_time();
|
|
my @results = generate_report($VERSION, $end_time - $start_time,
|
|
$type,
|
|
\%{$Results{ $type }}, \%Scale_Factor);
|
|
if ($opt_report_file) {
|
|
my $ext = ".lang";
|
|
$ext = ".file" unless $type eq "by language";
|
|
next if !$found_lang and $ext eq ".lang";
|
|
write_file($opt_report_file . $ext, {}, @results);
|
|
} else {
|
|
print "\n", join("\n", @results), "\n";
|
|
}
|
|
}
|
|
exit;
|
|
}
|
|
if ($opt_write_lang_def or $opt_write_lang_def_incl_dup) {
|
|
my $file = $opt_write_lang_def if $opt_write_lang_def;
|
|
$file = $opt_write_lang_def_incl_dup if $opt_write_lang_def_incl_dup;
|
|
write_lang_def($file ,
|
|
\%Language_by_Extension,
|
|
\%Language_by_Script ,
|
|
\%Language_by_File ,
|
|
\%Filters_by_Language ,
|
|
\%Not_Code_Extension ,
|
|
\%Not_Code_Filename ,
|
|
\%Scale_Factor ,
|
|
\%EOL_Continuation_re ,
|
|
);
|
|
exit;
|
|
}
|
|
if ($opt_show_os) {
|
|
if ($ON_WINDOWS) {
|
|
print "Windows\n";
|
|
} else {
|
|
print "UNIX\n";
|
|
}
|
|
exit;
|
|
}
|
|
|
|
my $max_processes = get_max_processes();
|
|
|
|
# 1}}}
|
|
# Step 3: Create a list of files to consider. {{{1
|
|
# a) If inputs are binary archives, first cd to a temp
|
|
# directory, expand the archive with the user-given
|
|
# extraction tool, then add the temp directory to
|
|
# the list of dirs to process.
|
|
# b) Create a list of every file that might contain source
|
|
# code. Ignore binary files, zero-sized files, and
|
|
# any file in a directory the user says to exclude.
|
|
# c) Determine the language for each file in the list.
|
|
#
|
|
my @binary_archive = ();
|
|
my $cwd = cwd();
|
|
if ($opt_extract_with) {
|
|
#print "cwd main = [$cwd]\n";
|
|
my @extract_location = ();
|
|
foreach my $bin_file (@ARGV) {
|
|
my $extract_dir = undef;
|
|
if ($opt_sdir) {
|
|
++$TEMP_OFF;
|
|
$extract_dir = "$opt_sdir/$TEMP_OFF";
|
|
File::Path::rmtree($extract_dir) if is_dir($extract_dir);
|
|
File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
|
|
} else {
|
|
$extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
|
|
}
|
|
$TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
|
|
print "mkdir $extract_dir\n" if $opt_v;
|
|
print "cd $extract_dir\n" if $opt_v;
|
|
chdir $extract_dir;
|
|
my $bin_file_full_path = "";
|
|
if (File::Spec->file_name_is_absolute( $bin_file )) {
|
|
$bin_file_full_path = $bin_file;
|
|
#print "bin_file_full_path (was ful) = [$bin_file_full_path]\n";
|
|
} else {
|
|
$bin_file_full_path = File::Spec->catfile( $cwd, $bin_file );
|
|
#print "bin_file_full_path (was rel) = [$bin_file_full_path]\n";
|
|
}
|
|
my $extract_cmd = uncompress_archive_cmd($bin_file_full_path);
|
|
print $extract_cmd, "\n" if $opt_v;
|
|
system $extract_cmd;
|
|
push @extract_location, $extract_dir;
|
|
chdir $cwd;
|
|
}
|
|
# It is possible that the binary archive itself contains additional
|
|
# files compressed the same way (true for Java .ear files). Go
|
|
# through all the files that were extracted, see if they are binary
|
|
# archives and try to extract them. Lather, rinse, repeat.
|
|
my $binary_archives_exist = 1;
|
|
my $count_binary_archives = 0;
|
|
my $previous_count = 0;
|
|
my $n_pass = 0;
|
|
while ($binary_archives_exist) {
|
|
@binary_archive = ();
|
|
foreach my $dir (@extract_location) {
|
|
find(\&archive_files, $dir); # populates global @binary_archive
|
|
}
|
|
foreach my $archive (@binary_archive) {
|
|
my $extract_dir = undef;
|
|
if ($opt_sdir) {
|
|
++$TEMP_OFF;
|
|
$extract_dir = "$opt_sdir/$TEMP_OFF";
|
|
File::Path::rmtree($extract_dir) if is_dir($extract_dir);
|
|
File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
|
|
} else {
|
|
$extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
|
|
}
|
|
$TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
|
|
print "mkdir $extract_dir\n" if $opt_v;
|
|
print "cd $extract_dir\n" if $opt_v;
|
|
chdir $extract_dir;
|
|
|
|
my $extract_cmd = uncompress_archive_cmd($archive);
|
|
print $extract_cmd, "\n" if $opt_v;
|
|
system $extract_cmd;
|
|
push @extract_location, $extract_dir;
|
|
unlink $archive; # otherwise will be extracting it forever
|
|
}
|
|
$count_binary_archives = scalar @binary_archive;
|
|
if ($count_binary_archives == $previous_count) {
|
|
$binary_archives_exist = 0;
|
|
}
|
|
$previous_count = $count_binary_archives;
|
|
}
|
|
chdir $cwd;
|
|
|
|
@ARGV = @extract_location;
|
|
} else {
|
|
# see if any of the inputs need to be auto-uncompressed &/or expanded
|
|
my @updated_ARGS = ();
|
|
replace_git_hash_with_tarfile(\@ARGV, \@git_similarity) if $opt_force_git;
|
|
foreach my $Arg (@ARGV) {
|
|
if (is_dir($Arg)) {
|
|
push @updated_ARGS, $Arg;
|
|
next;
|
|
}
|
|
my $full_path = "";
|
|
if (File::Spec->file_name_is_absolute( $Arg )) {
|
|
$full_path = $Arg;
|
|
} else {
|
|
$full_path = File::Spec->catfile( $cwd, $Arg );
|
|
}
|
|
#print "full_path = [$full_path]\n";
|
|
my $extract_cmd = uncompress_archive_cmd($full_path);
|
|
if ($extract_cmd) {
|
|
my $extract_dir = undef;
|
|
if ($opt_sdir) {
|
|
++$TEMP_OFF;
|
|
$extract_dir = "$opt_sdir/$TEMP_OFF";
|
|
File::Path::rmtree($extract_dir) if is_dir($extract_dir);
|
|
File::Path::mkpath($extract_dir) unless is_dir($extract_dir);
|
|
} else {
|
|
$extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
|
|
}
|
|
$TEMP_DIR{ $extract_dir } = 1 if $opt_sql or $opt_by_file;
|
|
print "mkdir $extract_dir\n" if $opt_v;
|
|
print "cd $extract_dir\n" if $opt_v;
|
|
chdir $extract_dir;
|
|
print $extract_cmd, "\n" if $opt_v;
|
|
system $extract_cmd;
|
|
push @updated_ARGS, $extract_dir;
|
|
chdir $cwd;
|
|
} else {
|
|
# this is a conventional, uncompressed, unarchived file
|
|
# or a directory; keep as-is
|
|
push @updated_ARGS, $Arg;
|
|
}
|
|
}
|
|
@ARGV = @updated_ARGS;
|
|
|
|
# make sure we're not counting any directory containing
|
|
# temporary installations of Regexp::Common, Algorithm::Diff
|
|
foreach my $d (sort keys %TEMP_INST) {
|
|
foreach my $a (@ARGV) {
|
|
next unless is_dir($a);
|
|
if ($opt_v > 2) {
|
|
printf "Comparing %s (location of %s) to input [%s]\n",
|
|
$d, $TEMP_INST{$d}, $a;
|
|
}
|
|
if ($a eq $d) {
|
|
die "File::Temp::tempdir chose directory ",
|
|
$d, " to install ", $TEMP_INST{$d}, " but this ",
|
|
"matches one of your input directories. Rerun ",
|
|
"with --sdir and supply a different temporary ",
|
|
"directory for ", $TEMP_INST{$d}, "\n";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
# 1}}}
|
|
my @Errors = ();
|
|
my @file_list = (); # global variable updated in files()
|
|
my %Ignored = (); # files that are not counted (language not recognized or
|
|
# problems reading the file)
|
|
my @Lines_Out = ();
|
|
my %upper_lower_map = (); # global variable (needed only on Windows) to
|
|
# track case of original filename, populated in
|
|
# make_file_list() if $ON_WINDOWS
|
|
if ($opt_diff) {
|
|
# Step 4: Separate code from non-code files. {{{1
|
|
my @fh = ();
|
|
my @files_for_set = ();
|
|
my @files_added_tot = ();
|
|
my @files_removed_tot = ();
|
|
my @file_pairs_tot = ();
|
|
# make file lists for each separate argument
|
|
if ($opt_diff_list_file) {
|
|
@files_for_set = ( (), () );
|
|
file_pairs_from_file($opt_diff_list_file, # in
|
|
\@files_added_tot , # out
|
|
\@files_removed_tot , # out
|
|
\@file_pairs_tot , # out
|
|
);
|
|
foreach my $F (@files_added_tot) {
|
|
if ($ON_WINDOWS) {
|
|
(my $lc = lc $F) =~ s{\\}{/}g;
|
|
$upper_lower_map{$lc} = $F;
|
|
$F = $lc;
|
|
}
|
|
push @{$files_for_set[1]}, $F;
|
|
}
|
|
foreach my $F (@files_removed_tot) {
|
|
if ($ON_WINDOWS) {
|
|
(my $lc = lc $F) =~ s{\\}{/}g;
|
|
$upper_lower_map{$lc} = $F;
|
|
$F = $lc;
|
|
}
|
|
push @{$files_for_set[0]}, $F;
|
|
}
|
|
foreach my $pair (@file_pairs_tot) {
|
|
if ($ON_WINDOWS) {
|
|
push @{$files_for_set[0]}, lc $pair->[0];
|
|
push @{$files_for_set[1]}, lc $pair->[1];
|
|
} else {
|
|
push @{$files_for_set[0]}, $pair->[0];
|
|
push @{$files_for_set[1]}, $pair->[1];
|
|
}
|
|
}
|
|
@ARGV = (1, 2); # place holders
|
|
}
|
|
for (my $i = 0; $i < scalar @ARGV; $i++) {
|
|
if ($opt_diff_list_file) {
|
|
push @fh, make_file_list($files_for_set[$i], $i+1,
|
|
\%Error_Codes, \@Errors, \%Ignored);
|
|
@{$files_for_set[$i]} = @file_list;
|
|
} elsif ($opt_diff_list_files) {
|
|
my @list_files = read_list_file($ARGV[$i]);
|
|
push @fh, make_file_list(\@list_files, $i+1,
|
|
\%Error_Codes, \@Errors, \%Ignored);
|
|
@{$files_for_set[$i]} = @file_list;
|
|
} else {
|
|
push @fh, make_file_list([ $ARGV[$i] ], $i+1,
|
|
\%Error_Codes, \@Errors, \%Ignored);
|
|
@{$files_for_set[$i]} = @file_list;
|
|
}
|
|
if ($opt_exclude_list_file) {
|
|
# note: process_exclude_list_file() references global @file_list
|
|
process_exclude_list_file($opt_exclude_list_file,
|
|
\%Exclude_Dir,
|
|
\%Ignored);
|
|
}
|
|
if ($opt_no_autogen) {
|
|
exclude_autogenerated_files(\@{$files_for_set[$i]}, # in/out
|
|
\%Error_Codes, \@Errors, \%Ignored);
|
|
}
|
|
@file_list = ();
|
|
}
|
|
# 1}}}
|
|
# Step 5: Remove duplicate files. {{{1
|
|
#
|
|
my %Language = ();
|
|
my %unique_source_file = ();
|
|
my $n_set = 0;
|
|
foreach my $FH (@fh) { # loop over each pair of file sets
|
|
++$n_set;
|
|
remove_duplicate_files($FH,
|
|
\%{$Language{$FH}} ,
|
|
\%{$unique_source_file{$FH}} ,
|
|
\%Error_Codes ,
|
|
\@Errors ,
|
|
\%Ignored );
|
|
if ($opt_exclude_content) {
|
|
exclude_by_regex($opt_exclude_content, # in
|
|
\%{$unique_source_file{$FH}}, # in/out
|
|
\%Ignored); # out
|
|
} elsif ($opt_include_content) {
|
|
include_by_regex($opt_include_content, # in
|
|
\%{$unique_source_file{$FH}}, # in/out
|
|
\%Ignored); # out
|
|
}
|
|
|
|
if ($opt_include_lang) {
|
|
# remove files associated with languages not
|
|
# specified by --include-lang
|
|
my @delete_file = ();
|
|
foreach my $file (keys %{$unique_source_file{$FH}}) {
|
|
my $keep_file = 0;
|
|
foreach my $keep_lang (keys %Include_Language) {
|
|
if (lc($Language{$FH}{$file}) eq $keep_lang) {
|
|
$keep_file = 1;
|
|
last;
|
|
}
|
|
}
|
|
next if $keep_file;
|
|
push @delete_file, $file;
|
|
}
|
|
foreach my $file (@delete_file) {
|
|
delete $Language{$FH}{$file};
|
|
}
|
|
}
|
|
|
|
printf "%2d: %8d unique file%s. \r",
|
|
$n_set,
|
|
plural_form(scalar keys %unique_source_file)
|
|
unless $opt_quiet;
|
|
}
|
|
# 1}}}
|
|
# Step 6: Count code, comments, blank lines. {{{1
|
|
#
|
|
my %Results_by_Language = ();
|
|
my %Results_by_File = ();
|
|
my %Delta_by_Language = ();
|
|
my %Delta_by_File = ();
|
|
|
|
my %alignment = ();
|
|
|
|
my $fset_a = $fh[0];
|
|
my $fset_b = $fh[1];
|
|
|
|
my $n_filepairs_compared = 0;
|
|
my $tot_counted = 0;
|
|
|
|
if ( scalar @fh != 2 ) {
|
|
print "Error: incorrect length fh array when preparing diff at step 6.\n";
|
|
exit 1;
|
|
}
|
|
if (!$opt_diff_list_file) {
|
|
align_by_pairs(\%{$unique_source_file{$fset_a}} , # in
|
|
\%{$unique_source_file{$fset_b}} , # in
|
|
\@files_added_tot , # out
|
|
\@files_removed_tot , # out
|
|
\@file_pairs_tot , # out
|
|
);
|
|
}
|
|
|
|
#use Data::Dumper;
|
|
#print "added : ", Dumper(\@files_added_tot);
|
|
#print "removed : ", Dumper(\@files_removed_tot);
|
|
#print "pairs : ", Dumper(\@file_pairs_tot);
|
|
|
|
if ( $max_processes == 0) {
|
|
# Multiprocessing is disabled
|
|
my $part = count_filesets ( $fset_a, $fset_b, \@files_added_tot,
|
|
\@files_removed_tot, \@file_pairs_tot,
|
|
0, \%Language, \%Ignored);
|
|
%Results_by_File = %{$part->{'results_by_file'}};
|
|
%Results_by_Language= %{$part->{'results_by_language'}};
|
|
%Delta_by_File = %{$part->{'delta_by_file'}};
|
|
%Delta_by_Language= %{$part->{'delta_by_language'}};
|
|
%Ignored = ( %Ignored, %{$part->{'ignored'}});
|
|
%alignment = %{$part->{'alignment'}};
|
|
$n_filepairs_compared = $part->{'n_filepairs_compared'};
|
|
push ( @Errors, @{$part->{'errors'}});
|
|
} else {
|
|
# Multiprocessing is enabled
|
|
# Do not create more processes than the amount of data to be processed
|
|
my $num_processes = min(max(scalar @files_added_tot,
|
|
scalar @files_removed_tot,
|
|
scalar @file_pairs_tot),
|
|
$max_processes);
|
|
# ... but use at least one process.
|
|
$num_processes = 1
|
|
if $num_processes == 0;
|
|
# Start processes for counting
|
|
my $pm = Parallel::ForkManager->new($num_processes);
|
|
# When processes finish, they will use the embedded subroutine for
|
|
# merging the data into global variables.
|
|
$pm->run_on_finish ( sub {
|
|
my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $part) = @_;
|
|
my $part_ignored = $part->{'ignored'};
|
|
my $part_result_by_file = $part->{'results_by_file'};
|
|
my $part_result_by_language = $part->{'results_by_language'};
|
|
my $part_delta_by_file = $part->{'delta_by_file'};
|
|
my $part_delta_by_language = $part->{'delta_by_language'};
|
|
my $part_alignment = $part->{'alignment'};
|
|
my $part_errors = $part->{'errors'};
|
|
$tot_counted += scalar keys %$part_result_by_file;
|
|
$n_filepairs_compared += $part->{'n_filepairs_compared'};
|
|
# Since files are processed by multiple processes, we can't measure
|
|
# the number of processed files exactly. We approximate this by showing
|
|
# the number of files counted by finished processes.
|
|
printf "Counting: %d\r", $tot_counted
|
|
if $opt_progress_rate;
|
|
|
|
foreach my $this_language ( keys %$part_result_by_language ) {
|
|
my $counts = $part_result_by_language->{$this_language};
|
|
foreach my $inner_key ( keys %$counts ) {
|
|
$Results_by_Language{$this_language}{$inner_key} +=
|
|
$counts->{$inner_key};
|
|
}
|
|
}
|
|
|
|
foreach my $this_language ( keys %$part_delta_by_language ) {
|
|
my $counts = $part_delta_by_language->{$this_language};
|
|
foreach my $inner_key ( keys %$counts ) {
|
|
my $statuses = $counts->{$inner_key};
|
|
foreach my $inner_status ( keys %$statuses ) {
|
|
$Delta_by_Language{$this_language}{$inner_key}{$inner_status} +=
|
|
$counts->{$inner_key}->{$inner_status};
|
|
}
|
|
}
|
|
}
|
|
|
|
foreach my $label ( keys %$part_alignment ) {
|
|
my $inner = $part_alignment->{$label};
|
|
foreach my $key ( keys %$inner ) {
|
|
$alignment{$label}{$key} = 1;
|
|
}
|
|
}
|
|
|
|
%Results_by_File = ( %Results_by_File, %$part_result_by_file );
|
|
%Delta_by_File = ( %Delta_by_File, %$part_delta_by_file );
|
|
%Ignored = (%Ignored, %$part_ignored );
|
|
push ( @Errors, @$part_errors );
|
|
} );
|
|
|
|
my $num_filepairs_per_part = ceil ( ( scalar @file_pairs_tot ) / $num_processes );
|
|
my $num_filesremoved_per_part = ceil ( ( scalar @files_removed_tot ) / $num_processes );
|
|
my $num_filesadded_per_part = ceil ( ( scalar @files_added_tot ) / $num_processes );
|
|
|
|
while ( 1 ) {
|
|
my @files_added_part = splice @files_added_tot, 0, $num_filesadded_per_part;
|
|
my @files_removed_part = splice @files_removed_tot, 0, $num_filesremoved_per_part;
|
|
my @filepairs_part = splice @file_pairs_tot, 0, $num_filepairs_per_part;
|
|
if ( scalar @files_added_part == 0 and scalar @files_removed_part == 0 and
|
|
scalar @filepairs_part == 0 ) {
|
|
last;
|
|
}
|
|
|
|
$pm->start() and next;
|
|
my $count_result = count_filesets ( $fset_a, $fset_b,
|
|
\@files_added_part, \@files_removed_part,
|
|
\@filepairs_part, 1, \%Language, \%Ignored );
|
|
$pm->finish(0 , $count_result);
|
|
}
|
|
# Wait for processes to finish
|
|
$pm->wait_all_children();
|
|
}
|
|
|
|
# Write alignment data, if needed
|
|
if ($opt_diff_alignment) {
|
|
write_alignment_data ( $opt_diff_alignment, $n_filepairs_compared, \%alignment ) ;
|
|
}
|
|
|
|
my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ": ";
|
|
my @ignored_reasons = map { "${_}${separator} $Ignored{$_}" } sort keys %Ignored;
|
|
write_file($opt_ignored, {"file_type" => "ignored",
|
|
"separator" => ": ",
|
|
"columns" => ["file", "reason"],
|
|
}, @ignored_reasons ) if $opt_ignored;
|
|
write_file($opt_counted, {}, sort keys %Results_by_File) if $opt_counted;
|
|
# 1}}}
|
|
# Step 7: Assemble results. {{{1
|
|
#
|
|
my $end_time = get_time();
|
|
printf "%8d file%s ignored. \n",
|
|
plural_form(scalar keys %Ignored) unless $opt_quiet;
|
|
print_errors(\%Error_Codes, \@Errors) if @Errors;
|
|
if (!%Delta_by_Language) {
|
|
print "Nothing to count.\n";
|
|
exit;
|
|
}
|
|
|
|
if ($opt_by_file) {
|
|
@Lines_Out = diff_report($VERSION, get_time() - $start_time,
|
|
"by file",
|
|
\%Delta_by_File, \%Scale_Factor);
|
|
} else {
|
|
@Lines_Out = diff_report($VERSION, get_time() - $start_time,
|
|
"by language",
|
|
\%Delta_by_Language, \%Scale_Factor);
|
|
}
|
|
|
|
# 1}}}
|
|
} else {
|
|
# Step 4: Separate code from non-code files. {{{1
|
|
my $fh = 0;
|
|
if ($opt_list_file or $opt_diff_list_files or $opt_vcs) {
|
|
my @list;
|
|
if ($opt_vcs) {
|
|
@list = invoke_generator($opt_vcs, \@ARGV);
|
|
} elsif ($opt_list_file) {
|
|
@list = read_list_file($opt_list_file);
|
|
} else {
|
|
@list = read_list_file($ARGV[0]);
|
|
}
|
|
$fh = make_file_list(\@list, 0, \%Error_Codes, \@Errors, \%Ignored);
|
|
} else {
|
|
$fh = make_file_list(\@ARGV, 0, \%Error_Codes, \@Errors, \%Ignored);
|
|
# make_file_list populates global variable @file_list via call to
|
|
# File::Find's find() which in turn calls files()
|
|
}
|
|
if ($opt_exclude_list_file) {
|
|
# note: process_exclude_list_file() references global @file_list
|
|
process_exclude_list_file($opt_exclude_list_file,
|
|
\%Exclude_Dir,
|
|
\%Ignored);
|
|
}
|
|
if ($opt_skip_win_hidden and $ON_WINDOWS) {
|
|
my @file_list_minus_hidden = ();
|
|
# eval code to run on Unix without 'missing Win32::File module' error.
|
|
my $win32_file_invocation = '
|
|
use Win32::File;
|
|
foreach my $F (@file_list) {
|
|
my $attr = undef;
|
|
Win32::File::GetAttributes($F, $attr);
|
|
if ($attr & HIDDEN) {
|
|
$Ignored{$F} = "Windows hidden file";
|
|
print "Ignoring $F since it is a Windows hidden file\n"
|
|
if $opt_v > 1;
|
|
} else {
|
|
push @file_list_minus_hidden, $F;
|
|
}
|
|
}';
|
|
eval $win32_file_invocation;
|
|
@file_list = @file_list_minus_hidden;
|
|
}
|
|
if ($opt_no_autogen) {
|
|
exclude_autogenerated_files(\@file_list, # in/out
|
|
\%Error_Codes, \@Errors, \%Ignored);
|
|
}
|
|
#printf "%8d file%s excluded. \n",
|
|
# plural_form(scalar keys %Ignored)
|
|
# unless $opt_quiet;
|
|
# die print ": ", join("\n: ", @file_list), "\n";
|
|
# 1}}}
|
|
# Step 5: Remove duplicate files. {{{1
|
|
#
|
|
my %Language = ();
|
|
my %unique_source_file = ();
|
|
remove_duplicate_files($fh , # in
|
|
\%Language , # out
|
|
\%unique_source_file , # out
|
|
\%Error_Codes , # in
|
|
\@Errors , # out
|
|
\%Ignored ); # out
|
|
if ($opt_exclude_content) {
|
|
exclude_by_regex($opt_exclude_content, # in
|
|
\%unique_source_file , # in/out
|
|
\%Ignored); # out
|
|
} elsif ($opt_include_content) {
|
|
include_by_regex($opt_include_content, # in
|
|
\%unique_source_file , # in/out
|
|
\%Ignored); # out
|
|
}
|
|
printf "%8d unique file%s. \n",
|
|
plural_form(scalar keys %unique_source_file)
|
|
unless $opt_quiet;
|
|
# 1}}}
|
|
# Step 6: Count code, comments, blank lines. {{{1
|
|
#
|
|
my %Results_by_Language = ();
|
|
my %Results_by_File = ();
|
|
my @results_parts = ();
|
|
my @sorted_files = sort keys %unique_source_file;
|
|
|
|
if ( $max_processes == 0) {
|
|
# Multiprocessing is disabled
|
|
my $part = count_files ( \@sorted_files , 0, \%Language);
|
|
%Results_by_File = %{$part->{'results_by_file'}};
|
|
%Results_by_Language= %{$part->{'results_by_language'}};
|
|
%Ignored = ( %Ignored, %{$part->{'ignored'}});
|
|
push ( @Errors, @{$part->{'errors'}});
|
|
} else {
|
|
# Do not create more processes than the number of files to be processed
|
|
my $num_files = scalar @sorted_files;
|
|
my $num_processes = $num_files >= $max_processes ? $max_processes : $num_files;
|
|
# Use at least one process.
|
|
$num_processes = 1
|
|
if $num_processes == 0;
|
|
# Start processes for counting
|
|
my $pm = Parallel::ForkManager->new($num_processes);
|
|
# When processes finish, they will use the embedded subroutine for
|
|
# merging the data into global variables.
|
|
$pm->run_on_finish ( sub {
|
|
my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $part) = @_;
|
|
my $part_ignored = $part->{'ignored'};
|
|
my $part_result_by_file = $part->{'results_by_file'};
|
|
my $part_result_by_language = $part->{'results_by_language'};
|
|
my $part_errors = $part->{'errors'};
|
|
my $nCounted+= scalar keys %$part_result_by_file;
|
|
# Since files are processed by multiple processes, we can't measure
|
|
# the number of processed files exactly. We approximate this by showing
|
|
# the number of files counted by finished processes.
|
|
printf "Counting: %d\r", $nCounted
|
|
if $opt_progress_rate;
|
|
|
|
foreach my $this_language ( keys %$part_result_by_language ) {
|
|
my $counts = $part_result_by_language->{$this_language};
|
|
foreach my $inner_key ( keys %$counts ) {
|
|
$Results_by_Language{$this_language}{$inner_key} +=
|
|
$counts->{$inner_key};
|
|
}
|
|
}
|
|
%Results_by_File = ( %Results_by_File, %$part_result_by_file );
|
|
%Ignored = (%Ignored, %$part_ignored);
|
|
push ( @Errors, @$part_errors);
|
|
} );
|
|
my $num_files_per_part = ceil ( ( scalar @sorted_files ) / $num_processes );
|
|
while ( my @part = splice @sorted_files, 0 , $num_files_per_part ) {
|
|
$pm->start() and next;
|
|
my $count_result = count_files ( \@part, 1, \%Language );
|
|
$pm->finish(0 , $count_result);
|
|
}
|
|
# Wait for processes to finish
|
|
$pm->wait_all_children();
|
|
}
|
|
|
|
my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ": ";
|
|
my @ignored_reasons = map { "${_}${separator} $Ignored{$_}" } sort keys %Ignored;
|
|
write_file($opt_ignored, {"file_type" => "ignored",
|
|
"separator" => $separator,
|
|
"columns" => ["file", "reason"],
|
|
}, @ignored_reasons ) if $opt_ignored;
|
|
if ($opt_summary_cutoff) {
|
|
%Results_by_Language = apply_cutoff($opt_summary_cutoff,
|
|
\%Results_by_Language);
|
|
}
|
|
write_file($opt_counted, {}, sort keys %Results_by_File) if $opt_counted;
|
|
# 1}}}
|
|
# Step 7: Assemble results. {{{1
|
|
#
|
|
my $end_time = get_time();
|
|
printf "%8d file%s ignored.\n", plural_form(scalar keys %Ignored)
|
|
unless $opt_quiet;
|
|
print_errors(\%Error_Codes, \@Errors) if @Errors;
|
|
if (!%Results_by_Language) {
|
|
write_null_results($opt_json, $opt_xml, $opt_report_file);
|
|
exit;
|
|
}
|
|
|
|
generate_sql($end_time - $start_time,
|
|
\%Results_by_File, \%Scale_Factor) if $opt_sql;
|
|
|
|
exit if $skip_generate_report;
|
|
if ($opt_by_file_by_lang) {
|
|
push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
|
|
"by file",
|
|
\%Results_by_File, \%Scale_Factor);
|
|
push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
|
|
"by language",
|
|
\%Results_by_Language, \%Scale_Factor);
|
|
} elsif ($opt_by_file) {
|
|
push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
|
|
"by file",
|
|
\%Results_by_File, \%Scale_Factor);
|
|
} else {
|
|
push @Lines_Out, generate_report( $VERSION, $end_time - $start_time,
|
|
"by language",
|
|
\%Results_by_Language, \%Scale_Factor);
|
|
}
|
|
# 1}}}
|
|
}
|
|
if ($opt_fmt) {
|
|
my $json_string = "";
|
|
write_file(\$json_string, {}, @Lines_Out);
|
|
my ($file_len, $lang_len, $header, %contents) = load_json($json_string);
|
|
@Lines_Out = print_format_n(abs($opt_fmt), $file_len, $lang_len, $header, %contents);
|
|
}
|
|
if ($opt_report_file) {
|
|
write_file($opt_report_file, {}, @Lines_Out);
|
|
} else {
|
|
if ($opt_fmt) {
|
|
print "@Lines_Out";
|
|
} else {
|
|
print "\n" unless $opt_quiet;
|
|
print join("\n", @Lines_Out), "\n";
|
|
}
|
|
}
|
|
if ($opt_count_diff) {
|
|
++$opt_count_diff;
|
|
exit if $opt_count_diff > 3;
|
|
goto Top_of_Processing_Loop;
|
|
}
|
|
sub summary_cutoff_error { # {{{
|
|
my ($parameter) = @_;
|
|
print "-> summary_cutoff_is_ok($parameter)\n" if $opt_v > 2;
|
|
my %known_keys = ( 'c' => 1, 'f' => 1, 'm' => 1, 'cm' => 1 );
|
|
my $result = "";
|
|
my $by_pct = 0;
|
|
my ($key, $value);
|
|
if ($parameter !~ /:/) {
|
|
$result = "expected a colon in --summary-cutoff argument";
|
|
} else {
|
|
($key, $value) = split(':', $parameter, 2);
|
|
if ($value =~ /%$/) {
|
|
$by_pct = 1;
|
|
$value =~ s/%$//;
|
|
}
|
|
if (!$known_keys{$key}) {
|
|
$result = "--summary-cutoff argument: '$key' is not 'c', 'f', 'm' or 'cm'";
|
|
}
|
|
if ($value !~ /^\d+(\.\d*)?$/) {
|
|
$result = "--summary-cutoff argument: '$value' is not a number";
|
|
}
|
|
}
|
|
print "<- summary_cutoff_is_ok($result)\n" if $opt_v > 2;
|
|
return $result;
|
|
} # 1}}}
|
|
sub apply_cutoff { # {{{1
|
|
my ($criterion,
|
|
$rhh_by_lang) = @_;
|
|
|
|
my %aggregated_Results_by_Language = ();
|
|
my $by_pct = 0;
|
|
my ($key, $value) = split(':', $criterion, 2);
|
|
if ($value =~ /%$/) {
|
|
$by_pct = 1;
|
|
$value =~ s/%$//;
|
|
}
|
|
|
|
my %sum = ();
|
|
if ($by_pct) {
|
|
foreach my $lang (keys %{$rhh_by_lang}) {
|
|
foreach my $category (qw(nFiles comment blank code)) {
|
|
$sum{$category} += $rhh_by_lang->{$lang}{$category};
|
|
}
|
|
}
|
|
if ($key eq 'c') {
|
|
$value *= $sum{'code'}/100;
|
|
} elsif ($key eq 'f') {
|
|
$value *= $sum{'nFiles'}/100;
|
|
} elsif ($key eq 'm') {
|
|
$value *= $sum{'comment'}/100;
|
|
} elsif ($key eq 'cm') {
|
|
$value *= ($sum{'code'} + $sum{'comment'})/100;
|
|
}
|
|
}
|
|
|
|
foreach my $lang (keys %{$rhh_by_lang}) {
|
|
my %sum = ();
|
|
my $agg_lang = $lang;
|
|
if ($key eq 'c') {
|
|
$agg_lang = 'Other' if $rhh_by_lang->{$lang}{'code'} <= $value;
|
|
} elsif ($key eq 'f') {
|
|
$agg_lang = 'Other' if $rhh_by_lang->{$lang}{'nFiles'} <= $value;
|
|
} elsif ($key eq 'm') {
|
|
$agg_lang = 'Other' if $rhh_by_lang->{$lang}{'comment'} <= $value;
|
|
} elsif ($key eq 'cm') {
|
|
$agg_lang = 'Other' if $rhh_by_lang->{$lang}{'code'} +
|
|
$rhh_by_lang->{$lang}{'comment'} <= $value;
|
|
}
|
|
foreach my $category (qw(nFiles comment blank code)) {
|
|
$aggregated_Results_by_Language{$agg_lang}{$category} +=
|
|
$rhh_by_lang->{$lang}{$category};
|
|
}
|
|
}
|
|
|
|
return %aggregated_Results_by_Language;
|
|
} # 1}}}
|
|
sub exclude_by_regex { # {{{1
|
|
my ($regex,
|
|
$rh_unique_source_file, # in/out
|
|
$rh_ignored , # out
|
|
) = @_;
|
|
my @exclude = ();
|
|
foreach my $file (keys %{$rh_unique_source_file}) {
|
|
my $line_num = 0;
|
|
foreach my $line (read_file($file)) {
|
|
++$line_num;
|
|
if ($line =~ /$regex/) {
|
|
$rh_ignored->{$file} = "line $line_num match for --exclude-content=$regex";
|
|
push @exclude, $file;
|
|
last;
|
|
}
|
|
}
|
|
}
|
|
foreach my $file (@exclude) {
|
|
delete $rh_unique_source_file->{$file};
|
|
}
|
|
} # 1}}}
|
|
sub include_by_regex { # {{{1
|
|
my ($regex,
|
|
$rh_unique_source_file, # in/out
|
|
$rh_ignored , # out
|
|
) = @_;
|
|
my @exclude = ();
|
|
foreach my $file (keys %{$rh_unique_source_file}) {
|
|
my $keep_this_one = 0;
|
|
foreach my $line (read_file($file)) {
|
|
if ($line =~ /$regex/) {
|
|
$keep_this_one = 1;
|
|
last;
|
|
}
|
|
}
|
|
if (!$keep_this_one) {
|
|
$rh_ignored->{$file} = "does not satisfy --include-content=$regex";
|
|
push @exclude, $file;
|
|
}
|
|
}
|
|
foreach my $file (@exclude) {
|
|
delete $rh_unique_source_file->{$file};
|
|
}
|
|
} # 1}}}
|
|
sub get_max_processes { # {{{1
|
|
# If user has specified valid number of processes, use that.
|
|
if (defined $opt_processes) {
|
|
eval "use Parallel::ForkManager 0.7.6;";
|
|
if ( defined $Parallel::ForkManager::VERSION ) {
|
|
$HAVE_Parallel_ForkManager = 1;
|
|
}
|
|
if ( $opt_processes !~ /^\d+$/ ) {
|
|
print "Error: processes option argument must be numeric.\n";
|
|
exit 1;
|
|
}
|
|
elsif ( $opt_processes >0 and ! $HAVE_Parallel_ForkManager ) {
|
|
print "Error: cannot use multiple processes, because " .
|
|
"Parallel::ForkManager is not installed, or the version is too old.\n";
|
|
exit 1;
|
|
}
|
|
elsif ( $opt_processes >0 and $ON_WINDOWS ) {
|
|
print "Error: cannot use multiple processes on Windows systems.\n";
|
|
exit 1;
|
|
}
|
|
else {
|
|
return $opt_processes;
|
|
}
|
|
}
|
|
|
|
# Disable multiprocessing on Windows - does not work reliably
|
|
if ($ON_WINDOWS) {
|
|
return 0;
|
|
}
|
|
|
|
# Disable multiprocessing if Parallel::ForkManager is not available
|
|
if ( ! $HAVE_Parallel_ForkManager ) {
|
|
return 0;
|
|
}
|
|
|
|
# Set to number of cores on Linux
|
|
if ( $^O =~ /linux/i and -x '/usr/bin/nproc' ) {
|
|
my $numavcores_linux = `/usr/bin/nproc`;
|
|
chomp $numavcores_linux;
|
|
if ( $numavcores_linux =~ /^\d+$/ ) {
|
|
return $numavcores_linux;
|
|
}
|
|
}
|
|
|
|
# Set to number of cores on macOS
|
|
if ( $^O =~ /darwin/i and -x '/usr/sbin/sysctl') {
|
|
my $numavcores_macos = `/usr/sbin/sysctl -n hw.physicalcpu`;
|
|
chomp $numavcores_macos;
|
|
if ($numavcores_macos =~ /^\d+$/ ) {
|
|
return $numavcores_macos;
|
|
}
|
|
}
|
|
|
|
# Disable multiprocessing in other cases
|
|
return 0;
|
|
} # 1}}}
|
|
sub exclude_autogenerated_files { # {{{1
|
|
my ($ra_file_list, # in/out
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
$rh_Ignored , # out
|
|
) = @_;
|
|
print "-> exclude_autogenerated_files()\n" if $opt_v > 2;
|
|
my @file_list_minus_autogen = ();
|
|
foreach my $file (@{$ra_file_list}) {
|
|
if ($file !~ /\.go$/ && $file !~ /\.ʕ◔ϖ◔ʔ$/) {
|
|
# at the moment, only know Go autogenerated files
|
|
push @file_list_minus_autogen, $file;
|
|
next;
|
|
}
|
|
my $first_line = first_line($file, 1, $rh_Err, $raa_errors);
|
|
if ($first_line =~ m{^//\s+Code\s+generated\s+.*?\s+DO\s+NOT\s+EDIT\.$}) {
|
|
$rh_Ignored->{$file} = 'Go autogenerated file';
|
|
} else {
|
|
# Go, but not autogenerated
|
|
push @file_list_minus_autogen, $file;
|
|
}
|
|
}
|
|
@{$ra_file_list} = @file_list_minus_autogen;
|
|
|
|
if ($opt_force_git) {
|
|
my $repo_dir = git_root_dir();
|
|
my @file_list_minus_linguist = ();
|
|
# if there's a .gitattributes file, look for linguist-generated
|
|
# and linguist-vendored entries to ignore
|
|
my $GA = ".gitattributes";
|
|
if (-f $GA) {
|
|
foreach my $line (read_file($GA)) {
|
|
next unless $line =~ /^(.*?)\s+(linguist-(vendored|generated))/;
|
|
my $re = glob2regex($1);
|
|
foreach my $file (@{$ra_file_list}) {
|
|
my $full_path = File::Spec->catfile($repo_dir, $file);
|
|
my $rel_file = File::Spec->abs2rel($full_path, $cwd);
|
|
my $match = undef;
|
|
if ($ON_WINDOWS) {
|
|
$rel_file =~ s{\\}{/}g;
|
|
$match = $rel_file =~ m{$re}i;
|
|
} else {
|
|
$match = $rel_file =~ m{$re};
|
|
}
|
|
if ($match) {
|
|
#print "RULE [$rel_file] v [$re]\n";
|
|
$rh_Ignored->{$file} = "matches $GA rule '$line'";
|
|
} else {
|
|
push @file_list_minus_linguist, $file;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
print "<- exclude_autogenerated_files()\n" if $opt_v > 2;
|
|
} # 1}}}
|
|
sub git_root_dir { # {{{1
|
|
# if in a git repo, return the repo's top level directory
|
|
my $cmd = "git rev-parse --show-toplevel";
|
|
print $cmd, "\n" if $opt_v > 1;
|
|
my $dir = undef;
|
|
chomp($dir = `$cmd`);
|
|
die "Not in a git repository" unless $dir
|
|
} # 1}}}
|
|
sub file_extension { # {{{1
|
|
my ($fname, ) = @_;
|
|
$fname =~ m/\.(\w+)$/;
|
|
if ($1) {
|
|
return $1;
|
|
} else {
|
|
return "";
|
|
}
|
|
} # 1}}}
|
|
sub count_files { # {{{1
|
|
my ($filelist, $counter_type, $language_hash) = @_;
|
|
print "-> count_files()\n" if $opt_v > 2;
|
|
my @p_errors = ();
|
|
my %p_ignored = ();
|
|
my %p_rbl = ();
|
|
my %p_rbf = ();
|
|
my %Language = %{$language_hash};
|
|
|
|
foreach my $file (@$filelist) {
|
|
if ( ! $counter_type ) {
|
|
# Multithreading disabled
|
|
$nCounted++;
|
|
|
|
printf "Counting: %d\r", $nCounted
|
|
unless (!$opt_progress_rate or ($nCounted % $opt_progress_rate));
|
|
}
|
|
|
|
next if $Ignored{$file};
|
|
if ($opt_include_ext and not $Include_Ext{ file_extension($file) }) {
|
|
$p_ignored{$file} = "not in --include-ext=$opt_include_ext";
|
|
next;
|
|
}
|
|
if ($opt_include_lang and not $Include_Language{lc($Language{$file})}) {
|
|
$p_ignored{$file} = "not in --include-lang=$opt_include_lang";
|
|
next;
|
|
}
|
|
if ($Exclude_Language{$Language{$file}}) {
|
|
$p_ignored{$file} = "--exclude-lang=$Language{$file}";
|
|
next;
|
|
}
|
|
if ($opt_force_lang_def and ($Language{$file} eq "XML") and
|
|
!defined $Filters_by_Language{XML}) {
|
|
# XML check is attempted for all unidentified text files.
|
|
# This can't be done if user forces language definition
|
|
# that excludes XML. GH #596
|
|
next;
|
|
}
|
|
|
|
my $Filters_by_Language_Language_file = ! @{$Filters_by_Language{$Language{$file}} };
|
|
if ($Filters_by_Language_Language_file) {
|
|
if ($Language{$file} eq "(unknown)") {
|
|
$p_ignored{$file} = "language unknown (#1)";
|
|
} else {
|
|
$p_ignored{$file} = "missing Filters_by_Language{$Language{$file}}";
|
|
}
|
|
next;
|
|
}
|
|
|
|
my ($all_line_count, $blank_count, $comment_count, $code_count) = (0, 0, 0, 0);
|
|
if (!$opt_only_count_files) {
|
|
if ($opt_use_sloccount and $Language{$file} =~ /^(C|C\+\+|XML|PHP|Pascal|Java)$/) {
|
|
chomp ($blank_count = `grep -cv \"[^[:space:]]\" '$file'`);
|
|
chomp ($all_line_count = `cat '$file' | wc -l`);
|
|
if ($Language{$file} =~ /^(C|C\+\+)$/) {
|
|
$code_count = `cat '$file' | c_count | head -n 1`;
|
|
} elsif ($Language{$file} eq "XML") {
|
|
$code_count = `cat '$file' | xml_count | head -n 1`;
|
|
} elsif ($Language{$file} eq "PHP") {
|
|
$code_count = `cat '$file' | php_count | head -n 1`;
|
|
} elsif ($Language{$file} eq "Pascal") {
|
|
$code_count = `cat '$file' | pascal_count | head -n 1`;
|
|
} elsif ($Language{$file} eq "Java") {
|
|
$code_count = `cat '$file' | java_count | head -n 1`;
|
|
} else {
|
|
die "SLOCCount match failure: file=[$file] lang=[$Language{$file}]";
|
|
}
|
|
$code_count = substr($code_count, 0, -2);
|
|
$comment_count = $all_line_count - $code_count - $blank_count;
|
|
} else {
|
|
($all_line_count,
|
|
$blank_count ,
|
|
$comment_count ,) = call_counter($file, $Language{$file}, \@Errors);
|
|
$code_count = $all_line_count - $blank_count - $comment_count;
|
|
}
|
|
}
|
|
|
|
if ($opt_by_file) {
|
|
$p_rbf{$file}{'code' } = $code_count ;
|
|
$p_rbf{$file}{'blank' } = $blank_count ;
|
|
$p_rbf{$file}{'comment'} = $comment_count ;
|
|
$p_rbf{$file}{'lang' } = $Language{$file};
|
|
$p_rbf{$file}{'nFiles' } = 1;
|
|
} else {
|
|
$p_rbf{$file} = 1; # just keep track of counted files
|
|
}
|
|
|
|
$p_rbl{$Language{$file}}{'nFiles'}++;
|
|
$p_rbl{$Language{$file}}{'code'} += $code_count ;
|
|
$p_rbl{$Language{$file}}{'blank'} += $blank_count ;
|
|
$p_rbl{$Language{$file}}{'comment'} += $comment_count;
|
|
|
|
}
|
|
print "<- count_files()\n" if $opt_v > 2;
|
|
return {
|
|
"ignored" => \%p_ignored,
|
|
"errors" => \@p_errors,
|
|
"results_by_file" => \%p_rbf,
|
|
"results_by_language" => \%p_rbl,
|
|
}
|
|
} # 1}}}
|
|
sub count_filesets { # {{{1
|
|
my ($fset_a,
|
|
$fset_b,
|
|
$files_added,
|
|
$files_removed,
|
|
$file_pairs,
|
|
$counter_type,
|
|
$language_hash,
|
|
$rh_Ignored) = @_;
|
|
print "-> count_filesets()\n" if $opt_v > 2;
|
|
my @p_errors = ();
|
|
my %p_alignment = ();
|
|
my %p_ignored = ();
|
|
my %p_rbl = ();
|
|
my %p_rbf = ();
|
|
my %p_dbl = ();
|
|
my %p_dbf = ();
|
|
my %Language = %$language_hash;
|
|
|
|
my $nCounted = 0;
|
|
|
|
my %already_counted = (); # already_counted{ filename } = 1
|
|
|
|
if (!@$file_pairs) {
|
|
# Special case where all files were either added or deleted.
|
|
# In this case, one of these arrays will be empty:
|
|
# @files_added, @files_removed
|
|
# so loop over both to cover both cases.
|
|
my $status = @$files_added ? 'added' : 'removed';
|
|
my $fset = @$files_added ? $fset_b : $fset_a;
|
|
foreach my $file (@$files_added, @$files_removed) {
|
|
next unless defined $Language{$fset}{$file};
|
|
my $Lang = $Language{$fset}{$file};
|
|
next if $Lang eq '(unknown)';
|
|
my ($all_line_count,
|
|
$blank_count ,
|
|
$comment_count ,
|
|
) = call_counter($file, $Lang, \@p_errors);
|
|
$already_counted{$file} = 1;
|
|
my $code_count = $all_line_count-$blank_count-$comment_count;
|
|
if ($opt_by_file) {
|
|
$p_dbf{$file}{'code' }{$status} += $code_count ;
|
|
$p_dbf{$file}{'blank' }{$status} += $blank_count ;
|
|
$p_dbf{$file}{'comment'}{$status} += $comment_count;
|
|
$p_dbf{$file}{'lang' }{$status} = $Lang ;
|
|
$p_dbf{$file}{'nFiles' }{$status} += 1 ;
|
|
}
|
|
$p_dbl{$Lang}{'code' }{$status} += $code_count ;
|
|
$p_dbl{$Lang}{'blank' }{$status} += $blank_count ;
|
|
$p_dbl{$Lang}{'comment'}{$status} += $comment_count;
|
|
$p_dbl{$Lang}{'nFiles' }{$status} += 1 ;
|
|
}
|
|
}
|
|
|
|
#use Data::Dumper::Simple;
|
|
#use Data::Dumper;
|
|
#print Dumper(\@files_added, \@files_removed, \@file_pairs);
|
|
#print "after align_by_pairs:\n";
|
|
#print "added:\n";
|
|
|
|
foreach my $f (@$files_added) {
|
|
next if $already_counted{$f};
|
|
#printf "%10s -> %s\n", $f, $Language{$fh[$F+1]}{$f};
|
|
# Don't proceed unless the file (both L and R versions)
|
|
# is in a known language.
|
|
next if $opt_include_ext
|
|
and not $Include_Ext{ file_extension($f) };
|
|
if (!defined $Language{$fset_b}{$f}) {
|
|
$p_ignored{$f} = "excluded or unknown language";
|
|
next;
|
|
}
|
|
next if $opt_include_lang
|
|
and not $Include_Language{lc($Language{$fset_b}{$f})};
|
|
my $this_lang = $Language{$fset_b}{$f};
|
|
if (!defined $Language{$fset_b}{$f}) {
|
|
# shouldn't happen but could get here if using
|
|
# --diff-list-file which bypasses earlier checks
|
|
$p_ignored{$f} = "empty or uncharacterizeable file";
|
|
next;
|
|
}
|
|
if ($this_lang eq "(unknown)") {
|
|
$p_ignored{$f} = "unknown language";
|
|
next;
|
|
}
|
|
if ($Exclude_Language{$this_lang}) {
|
|
$p_ignored{$f} = "--exclude-lang=$this_lang";
|
|
next;
|
|
}
|
|
$p_alignment{"added"}{sprintf " + %s ; %s\n", $f, $this_lang} = 1;
|
|
++$p_dbl{ $this_lang }{'nFiles'}{'added'};
|
|
# Additionally, add contents of file $f to
|
|
# Delta_by_File{$f}{comment/blank/code}{'added'}
|
|
# Delta_by_Language{$lang}{comment/blank/code}{'added'}
|
|
# via the $p_dbl and $p_dbf variables.
|
|
my ($all_line_count,
|
|
$blank_count ,
|
|
$comment_count ,
|
|
) = call_counter($f, $this_lang, \@p_errors);
|
|
$p_dbl{ $this_lang }{'comment'}{'added'} += $comment_count;
|
|
$p_dbl{ $this_lang }{'blank'}{'added'} += $blank_count;
|
|
$p_dbl{ $this_lang }{'code'}{'added'} +=
|
|
$all_line_count - $blank_count - $comment_count;
|
|
$p_dbf{ $f }{'comment'}{'added'} = $comment_count;
|
|
$p_dbf{ $f }{'blank'}{'added'} = $blank_count;
|
|
$p_dbf{ $f }{'code'}{'added'} =
|
|
$all_line_count - $blank_count - $comment_count;
|
|
}
|
|
|
|
#print "removed:\n";
|
|
foreach my $f (@$files_removed) {
|
|
next if $already_counted{$f};
|
|
# Don't proceed unless the file (both L and R versions)
|
|
# is in a known language.
|
|
next if $opt_include_ext
|
|
and not $Include_Ext{ file_extension($f) };
|
|
next if $opt_include_lang
|
|
and (not defined $Language{$fset_a}{$f}
|
|
or not defined $Include_Language{lc($Language{$fset_a}{$f})});
|
|
my $this_lang = $Language{$fset_a}{$f};
|
|
if ((not defined $this_lang) or ($this_lang eq "(unknown)")) {
|
|
$p_ignored{$f} = "unknown language";
|
|
next;
|
|
}
|
|
if ($Exclude_Language{$this_lang}) {
|
|
$p_ignored{$f} = "--exclude-lang=$this_lang";
|
|
next;
|
|
}
|
|
++$p_dbl{ $this_lang }{'nFiles'}{'removed'};
|
|
$p_alignment{"removed"}{sprintf " - %s ; %s\n", $f, $this_lang} = 1;
|
|
#printf "%10s -> %s\n", $f, $Language{$fh[$F ]}{$f};
|
|
# Additionally, add contents of file $f to
|
|
# Delta_by_File{$f}{comment/blank/code}{'removed'}
|
|
# Delta_by_Language{$lang}{comment/blank/code}{'removed'}
|
|
# via the $p_dbl and $p_dbf variables.
|
|
my ($all_line_count,
|
|
$blank_count ,
|
|
$comment_count ,
|
|
) = call_counter($f, $this_lang, \@p_errors);
|
|
$p_dbl{ $this_lang}{'comment'}{'removed'} += $comment_count;
|
|
$p_dbl{ $this_lang}{'blank'}{'removed'} += $blank_count;
|
|
$p_dbl{ $this_lang}{'code'}{'removed'} +=
|
|
$all_line_count - $blank_count - $comment_count;
|
|
$p_dbf{ $f }{'comment'}{'removed'} = $comment_count;
|
|
$p_dbf{ $f }{'blank'}{'removed'} = $blank_count;
|
|
$p_dbf{ $f }{'code'}{'removed'} =
|
|
$all_line_count - $blank_count - $comment_count;
|
|
}
|
|
|
|
my $n_file_pairs_compared = 0;
|
|
# Don't know ahead of time how many file pairs will be compared
|
|
# since duplicates are weeded out below. The answer is
|
|
# scalar @file_pairs only if there are no duplicates.
|
|
|
|
foreach my $pair (@$file_pairs) {
|
|
my $file_L = $pair->[0];
|
|
my $file_R = $pair->[1];
|
|
my $Lang_L = $Language{$fset_a}{$file_L};
|
|
my $Lang_R = $Language{$fset_b}{$file_R};
|
|
if (!defined($Lang_L) or !defined($Lang_R)) {
|
|
print " -> count_filesets skipping $file_L, $file_R ",
|
|
"because language cannot be inferred\n" if $opt_v;
|
|
next;
|
|
}
|
|
#print "main step 6 file_L=$file_L file_R=$file_R\n";
|
|
++$nCounted;
|
|
printf "Counting: %d\r", $nCounted
|
|
unless ($counter_type or !$opt_progress_rate or ($nCounted % $opt_progress_rate));
|
|
next if $p_ignored{$file_L} or $p_ignored{$file_R};
|
|
|
|
# filter out non-included extensions
|
|
if ($opt_include_ext and not $Include_Ext{ file_extension($file_L) }
|
|
and not $Include_Ext{ file_extension($file_R) }) {
|
|
$p_ignored{$file_L} = "not in --include-ext=$opt_include_ext";
|
|
$p_ignored{$file_R} = "not in --include-ext=$opt_include_ext";
|
|
next;
|
|
}
|
|
# filter out non-included languages
|
|
if ($opt_include_lang and not $Include_Language{lc($Lang_L)}
|
|
and not $Include_Language{lc($Lang_R)}) {
|
|
$p_ignored{$file_L} = "not in --include-lang=$opt_include_lang";
|
|
$p_ignored{$file_R} = "not in --include-lang=$opt_include_lang";
|
|
next;
|
|
}
|
|
# filter out excluded or unrecognized languages
|
|
if ($Exclude_Language{$Lang_L} or $Exclude_Language{$Lang_R}) {
|
|
$p_ignored{$file_L} = "--exclude-lang=$Lang_L";
|
|
$p_ignored{$file_R} = "--exclude-lang=$Lang_R";
|
|
next;
|
|
}
|
|
|
|
my $not_Filters_by_Language_Lang_LR = 0;
|
|
#print "file_LR = [$file_L] [$file_R]\n";
|
|
#print "Lang_LR = [$Lang_L] [$Lang_R]\n";
|
|
if (($Lang_L eq "(unknown)") or
|
|
($Lang_R eq "(unknown)") or
|
|
!(@{$Filters_by_Language{$Lang_L} }) or
|
|
!(@{$Filters_by_Language{$Lang_R} })) {
|
|
$not_Filters_by_Language_Lang_LR = 1;
|
|
}
|
|
if ($not_Filters_by_Language_Lang_LR) {
|
|
if (($Lang_L eq "(unknown)") or ($Lang_R eq "(unknown)")) {
|
|
$p_ignored{$fset_a}{$file_L} = "language unknown (#1)";
|
|
$p_ignored{$fset_b}{$file_R} = "language unknown (#1)";
|
|
} else {
|
|
$p_ignored{$fset_a}{$file_L} = "missing Filters_by_Language{$Lang_L}";
|
|
$p_ignored{$fset_b}{$file_R} = "missing Filters_by_Language{$Lang_R}";
|
|
}
|
|
next;
|
|
}
|
|
|
|
# filter out explicitly excluded files
|
|
if ($opt_exclude_list_file and
|
|
($rh_Ignored->{$file_L} or $rh_Ignored->{$file_R})) {
|
|
my $msg_2;
|
|
if ($rh_Ignored->{$file_L}) {
|
|
$msg_2 = "$file_L (paired to $file_R)";
|
|
} else {
|
|
$msg_2 = "$file_R (paired to $file_L)";
|
|
}
|
|
my $msg_1 = "in --exclude-list-file=$opt_exclude_list_file";
|
|
$p_ignored{$file_L} = "$msg_1, $msg_2";
|
|
$p_ignored{$file_R} = "$msg_1, $msg_2";
|
|
next;
|
|
}
|
|
|
|
#print "DIFF($file_L, $file_R)\n";
|
|
# step 0: compare the two files' contents
|
|
chomp ( my @lines_L = read_file($file_L) );
|
|
chomp ( my @lines_R = read_file($file_R) );
|
|
my $language_file_L = "";
|
|
if (defined $Language{$fset_a}{$file_L}) {
|
|
$language_file_L = $Language{$fset_a}{$file_L};
|
|
} else {
|
|
# files $file_L and $file_R do not contain known language
|
|
next;
|
|
}
|
|
|
|
my $contents_are_same = 1;
|
|
if (scalar @lines_L == scalar @lines_R) {
|
|
# same size, must compare line-by-line
|
|
for (my $i = 0; $i < scalar @lines_L; $i++) {
|
|
if ($lines_L[$i] ne $lines_R[$i]) {
|
|
$contents_are_same = 0;
|
|
last;
|
|
}
|
|
}
|
|
if ($contents_are_same) {
|
|
++$p_dbl{$language_file_L}{'nFiles'}{'same'};
|
|
} else {
|
|
++$p_dbl{$language_file_L}{'nFiles'}{'modified'};
|
|
}
|
|
} else {
|
|
$contents_are_same = 0;
|
|
# different sizes, contents have changed
|
|
++$p_dbl{$language_file_L}{'nFiles'}{'modified'};
|
|
}
|
|
|
|
if ($opt_diff_alignment) {
|
|
my $str = "$file_L | $file_R ; $language_file_L";
|
|
if ($contents_are_same) {
|
|
$p_alignment{"pairs"}{" == $str"} = 1;
|
|
} else {
|
|
$p_alignment{"pairs"}{" != $str"} = 1;
|
|
}
|
|
++$n_file_pairs_compared;
|
|
}
|
|
|
|
my ($all_line_count_L, $blank_count_L , $comment_count_L ,
|
|
$all_line_count_R, $blank_count_R , $comment_count_R , ) = (0,0,0,0,0,0,);
|
|
if (!$contents_are_same) {
|
|
# step 1: identify comments in both files
|
|
#print "Diff blank removal L language= $Lang_L";
|
|
#print " scalar(lines_L)=", scalar @lines_L, "\n";
|
|
my @original_minus_blanks_L
|
|
= rm_blanks( \@lines_L, $Lang_L, \%EOL_Continuation_re);
|
|
#print "1: scalar(original_minus_blanks_L)=", scalar @original_minus_blanks_L, "\n";
|
|
@lines_L = @original_minus_blanks_L;
|
|
#print "2: scalar(lines_L)=", scalar @lines_L, "\n";
|
|
@lines_L = add_newlines(\@lines_L); # compensate for rm_comments()
|
|
@lines_L = rm_comments( \@lines_L, $Lang_L, $file_L,
|
|
\%EOL_Continuation_re);
|
|
#print "3: scalar(lines_L)=", scalar @lines_L, "\n";
|
|
|
|
#print "Diff blank removal R language= $Lang_R\n";
|
|
my @original_minus_blanks_R
|
|
= rm_blanks( \@lines_R, $Lang_R, \%EOL_Continuation_re);
|
|
@lines_R = @original_minus_blanks_R;
|
|
@lines_R = add_newlines(\@lines_R); # taken away by rm_comments()
|
|
@lines_R = rm_comments( \@lines_R, $Lang_R, $file_R,
|
|
\%EOL_Continuation_re);
|
|
|
|
my (@diff_LL, @diff_LR, );
|
|
array_diff( $file_L , # in
|
|
\@original_minus_blanks_L , # in
|
|
\@lines_L , # in
|
|
"comment" , # in
|
|
\@diff_LL, \@diff_LR , # out
|
|
\@p_errors); # in/out
|
|
|
|
my (@diff_RL, @diff_RR, );
|
|
array_diff( $file_R , # in
|
|
\@original_minus_blanks_R , # in
|
|
\@lines_R , # in
|
|
"comment" , # in
|
|
\@diff_RL, \@diff_RR , # out
|
|
\@p_errors); # in/out
|
|
# each line of each file is now classified as
|
|
# code or comment
|
|
#use Data::Dumper;
|
|
#print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
|
|
#print Dumper("diff_RL", \@diff_RL, "diff_RR", \@diff_RR, );
|
|
#die;
|
|
|
|
# step 2: separate code from comments for L and R files
|
|
my @code_L = ();
|
|
my @code_R = ();
|
|
my @comm_L = ();
|
|
my @comm_R = ();
|
|
foreach my $line_info (@diff_LL) {
|
|
if ($line_info->{'type'} eq "code" ) {
|
|
push @code_L, $line_info->{char};
|
|
} elsif ($line_info->{'type'} eq "comment") {
|
|
push @comm_L, $line_info->{char};
|
|
} else {
|
|
die "Diff unexpected line type ",
|
|
$line_info->{'type'}, "for $file_L line ",
|
|
$line_info->{'lnum'};
|
|
}
|
|
}
|
|
|
|
foreach my $line_info (@diff_RL) {
|
|
if ($line_info->{type} eq "code" ) {
|
|
push @code_R, $line_info->{'char'};
|
|
} elsif ($line_info->{type} eq "comment") {
|
|
push @comm_R, $line_info->{'char'};
|
|
} else {
|
|
die "Diff unexpected line type ",
|
|
$line_info->{'type'}, "for $file_R line ",
|
|
$line_info->{'lnum'};
|
|
}
|
|
}
|
|
|
|
if ($opt_ignore_whitespace) {
|
|
# strip all whitespace from each line of source code
|
|
# and comments then use these stripped arrays in diffs
|
|
foreach (@code_L) { s/\s+//g }
|
|
foreach (@code_R) { s/\s+//g }
|
|
foreach (@comm_L) { s/\s+//g }
|
|
foreach (@comm_R) { s/\s+//g }
|
|
}
|
|
if ($opt_ignore_case) {
|
|
# change all text to lowercase in diffs
|
|
foreach (@code_L) { $_ = lc }
|
|
foreach (@code_R) { $_ = lc }
|
|
foreach (@comm_L) { $_ = lc }
|
|
foreach (@comm_R) { $_ = lc }
|
|
}
|
|
# step 3: compute code diffs
|
|
array_diff("$file_L v. $file_R" , # in
|
|
\@code_L , # in
|
|
\@code_R , # in
|
|
"revision" , # in
|
|
\@diff_LL, \@diff_LR , # out
|
|
\@p_errors); # in/out
|
|
#print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, );
|
|
#print Dumper("diff_LR", \@diff_LR);
|
|
foreach my $line_info (@diff_LR) {
|
|
my $status = $line_info->{'desc'}; # same|added|removed|modified
|
|
++$p_dbl{$Lang_L}{'code'}{$status};
|
|
if ($opt_by_file) {
|
|
++$p_dbf{$file_L}{'code'}{$status};
|
|
}
|
|
}
|
|
#use Data::Dumper;
|
|
#print Dumper("code diffs:", \@diff_LL, \@diff_LR);
|
|
|
|
# step 4: compute comment diffs
|
|
array_diff("$file_L v. $file_R" , # in
|
|
\@comm_L , # in
|
|
\@comm_R , # in
|
|
"revision" , # in
|
|
\@diff_LL, \@diff_LR , # out
|
|
\@Errors); # in/out
|
|
#print Dumper("comment diff_LR", \@diff_LR);
|
|
foreach my $line_info (@diff_LR) {
|
|
my $status = $line_info->{'desc'}; # same|added|removed|modified
|
|
++$p_dbl{$Lang_L}{'comment'}{$status};
|
|
if ($opt_by_file) {
|
|
++$p_dbf{$file_L}{'comment'}{$status};
|
|
}
|
|
}
|
|
#print Dumper("comment diffs:", \@diff_LL, \@diff_LR);
|
|
|
|
# step 5: compute difference in blank lines (kind of pointless)
|
|
next if $Lang_L eq '(unknown)' or
|
|
$Lang_R eq '(unknown)';
|
|
($all_line_count_L,
|
|
$blank_count_L ,
|
|
$comment_count_L ,
|
|
) = call_counter($file_L, $Lang_L, \@Errors);
|
|
|
|
($all_line_count_R,
|
|
$blank_count_R ,
|
|
$comment_count_R ,
|
|
) = call_counter($file_R, $Lang_R, \@Errors);
|
|
} else {
|
|
# L and R file contents are identical, no need to diff
|
|
($all_line_count_L,
|
|
$blank_count_L ,
|
|
$comment_count_L ,
|
|
) = call_counter($file_L, $Lang_L, \@Errors);
|
|
$all_line_count_R = $all_line_count_L;
|
|
$blank_count_R = $blank_count_L ;
|
|
$comment_count_R = $comment_count_L ;
|
|
my $code_lines_R = $all_line_count_R - ($blank_count_R + $comment_count_R);
|
|
$p_dbl{$Lang_L}{'blank'}{'same'} += $blank_count_R;
|
|
$p_dbl{$Lang_L}{'comment'}{'same'} += $comment_count_R;
|
|
$p_dbl{$Lang_L}{'code'}{'same'} += $code_lines_R;
|
|
if ($opt_by_file) {
|
|
$p_dbf{$file_L}{'blank'}{'same'} += $blank_count_R;
|
|
$p_dbf{$file_L}{'comment'}{'same'} += $comment_count_R;
|
|
$p_dbf{$file_L}{'code'}{'same'} += $code_lines_R;
|
|
}
|
|
}
|
|
|
|
if ($blank_count_L < $blank_count_R) {
|
|
my $D = $blank_count_R - $blank_count_L;
|
|
$p_dbl{$Lang_L}{'blank'}{'added'} += $D;
|
|
} else {
|
|
my $D = $blank_count_L - $blank_count_R;
|
|
$p_dbl{$Lang_L}{'blank'}{'removed'} += $D;
|
|
}
|
|
if ($opt_by_file) {
|
|
if ($blank_count_L < $blank_count_R) {
|
|
my $D = $blank_count_R - $blank_count_L;
|
|
$p_dbf{$file_L}{'blank'}{'added'} += $D;
|
|
} else {
|
|
my $D = $blank_count_L - $blank_count_R;
|
|
$p_dbf{$file_L}{'blank'}{'removed'} += $D;
|
|
}
|
|
}
|
|
|
|
my $code_count_L = $all_line_count_L-$blank_count_L-$comment_count_L;
|
|
if ($opt_by_file) {
|
|
$p_rbf{$file_L}{'code' } = $code_count_L ;
|
|
$p_rbf{$file_L}{'blank' } = $blank_count_L ;
|
|
$p_rbf{$file_L}{'comment'} = $comment_count_L ;
|
|
$p_rbf{$file_L}{'lang' } = $Lang_L ;
|
|
$p_rbf{$file_L}{'nFiles' } = 1 ;
|
|
} else {
|
|
$p_rbf{$file_L} = 1; # just keep track of counted files
|
|
}
|
|
|
|
$p_rbl{$Lang_L}{'nFiles'}++;
|
|
$p_rbl{$Lang_L}{'code'} += $code_count_L ;
|
|
$p_rbl{$Lang_L}{'blank'} += $blank_count_L ;
|
|
$p_rbl{$Lang_L}{'comment'} += $comment_count_L;
|
|
}
|
|
|
|
print "<- count_filesets()\n" if $opt_v > 2;
|
|
return {
|
|
"ignored" => \%p_ignored,
|
|
"errors" => \@p_errors,
|
|
"results_by_file" => \%p_rbf,
|
|
"results_by_language" => \%p_rbl,
|
|
"delta_by_file" => \%p_dbf,
|
|
"delta_by_language" => \%p_dbl,
|
|
"alignment" => \%p_alignment,
|
|
"n_filepairs_compared" => $n_file_pairs_compared
|
|
}
|
|
} # 1}}}
|
|
sub write_alignment_data { # {{{1
|
|
my ($filename, $n_filepairs_compared, $data ) = @_;
|
|
my @output = ();
|
|
if ( $data->{'added'} ) {
|
|
my %added_lines = %{$data->{'added'}};
|
|
push (@output, "Files added: " . (scalar keys %added_lines) . "\n");
|
|
foreach my $line ( sort keys %added_lines ) {
|
|
push (@output, $line);
|
|
}
|
|
push (@output, "\n" );
|
|
}
|
|
if ( $data->{'removed'} ) {
|
|
my %removed_lines = %{$data->{'removed'}};
|
|
push (@output, "Files removed: " . (scalar keys %removed_lines) . "\n");
|
|
foreach my $line ( sort keys %removed_lines ) {
|
|
push (@output, $line);
|
|
}
|
|
push (@output, "\n");
|
|
}
|
|
if ( $data->{'pairs'} ) {
|
|
my %pairs = %{$data->{'pairs'}};
|
|
push (@output, "File pairs compared: " . $n_filepairs_compared . "\n");
|
|
foreach my $pair ( sort keys %pairs ) {
|
|
push (@output, $pair);
|
|
}
|
|
}
|
|
write_file($filename, {}, @output);
|
|
} # 1}}}
|
|
sub exclude_dir_validates { # {{{1
|
|
my ($rh_Exclude_Dir) = @_;
|
|
my $is_OK = 1;
|
|
foreach my $dir (keys %{$rh_Exclude_Dir}) {
|
|
if (($ON_WINDOWS and $dir =~ m{\\}) or ($dir =~ m{/})) {
|
|
$is_OK = 0;
|
|
warn "--exclude-dir '$dir' : cannot specify directory paths\n";
|
|
}
|
|
}
|
|
if (!$is_OK) {
|
|
warn "Use '--fullpath --not-match-d=REGEX' instead\n";
|
|
}
|
|
return $is_OK;
|
|
} # 1}}}
|
|
sub process_exclude_list_file { # {{{1
|
|
my ($list_file , # in
|
|
$rh_exclude_dir , # out
|
|
$rh_ignored , # out
|
|
) = @_;
|
|
# note: references global @file_list
|
|
print "-> process_exclude_list_file($list_file)\n" if $opt_v > 2;
|
|
# reject a specific set of files and/or directories
|
|
my @reject_list = ($list_file); # don't count the exclude list file itself
|
|
push @reject_list, read_list_file($list_file);
|
|
my @file_reject_list = ();
|
|
foreach my $F_or_D (@reject_list) {
|
|
if (is_dir($F_or_D)) {
|
|
$rh_exclude_dir->{$F_or_D} = 1;
|
|
} elsif (is_file($F_or_D)) {
|
|
push @file_reject_list, $F_or_D;
|
|
}
|
|
}
|
|
|
|
# Normalize file names for better comparison.
|
|
my %normalized_input = normalize_file_names(@file_list);
|
|
my %normalized_reject = normalize_file_names(@file_reject_list);
|
|
my %normalized_exclude = normalize_file_names(keys %{$rh_exclude_dir});
|
|
foreach my $F (keys %normalized_input) {
|
|
if ($normalized_reject{$F} or is_excluded($F, \%normalized_exclude)) {
|
|
my $orig_F = $normalized_input{$F};
|
|
$rh_ignored->{$orig_F} = "listed in exclusion file $opt_exclude_list_file";
|
|
print "Ignoring $orig_F because it appears in $opt_exclude_list_file\n"
|
|
if $opt_v > 1;
|
|
}
|
|
}
|
|
|
|
print "<- process_exclude_list_file\n" if $opt_v > 2;
|
|
} # 1}}}
|
|
sub combine_results { # {{{1
|
|
# returns 1 if the inputs are categorized by language
|
|
# 0 if no identifiable language was found
|
|
my ($ra_report_files, # in
|
|
$report_type , # in "by language" or "by report file"
|
|
$rhh_count , # out count{TYPE}{nFiles|code|blank|comment|scaled}
|
|
$rhaa_Filters_by_Language , # in
|
|
) = @_;
|
|
|
|
print "-> combine_results(report_type=$report_type)\n" if $opt_v > 2;
|
|
my $found_language = 0;
|
|
|
|
foreach my $file (@{$ra_report_files}) {
|
|
my $n_results_found = 0;
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
warn "Unable to read $file; ignoring.\n";
|
|
next;
|
|
}
|
|
while (<$IN>) {
|
|
next if /^(http|Language|SUM|-----)/;
|
|
if (!$opt_by_file and
|
|
m{^(.*?)\s+ # language
|
|
(\d+)\s+ # files
|
|
(\d+)\s+ # blank
|
|
(\d+)\s+ # comments
|
|
(\d+)\s+ # code
|
|
( # next four entries missing with -no3
|
|
x\s+ # x
|
|
\d+\.\d+\s+ # scale
|
|
=\s+ # =
|
|
(\d+\.\d+)\s* # scaled code
|
|
)?
|
|
$}x) {
|
|
if ($report_type eq "by language") {
|
|
if (!defined $rhaa_Filters_by_Language->{$1}) {
|
|
warn "Unrecognized language '$1' in $file ignored\n";
|
|
next;
|
|
}
|
|
# above test necessary to avoid trying to sum reports
|
|
# of reports (which have no language breakdown).
|
|
$found_language = 1;
|
|
$rhh_count->{$1 }{'nFiles' } += $2;
|
|
$rhh_count->{$1 }{'blank' } += $3;
|
|
$rhh_count->{$1 }{'comment'} += $4;
|
|
$rhh_count->{$1 }{'code' } += $5;
|
|
$rhh_count->{$1 }{'scaled' } += $7 if $opt_3;
|
|
} else {
|
|
$rhh_count->{$file}{'nFiles' } += $2;
|
|
$rhh_count->{$file}{'blank' } += $3;
|
|
$rhh_count->{$file}{'comment'} += $4;
|
|
$rhh_count->{$file}{'code' } += $5;
|
|
$rhh_count->{$file}{'scaled' } += $7 if $opt_3;
|
|
}
|
|
++$n_results_found;
|
|
} elsif ($opt_by_file and
|
|
m{^(.*?)\s+ # language
|
|
(\d+)\s+ # blank
|
|
(\d+)\s+ # comments
|
|
(\d+)\s+ # code
|
|
( # next four entries missing with -no3
|
|
x\s+ # x
|
|
\d+\.\d+\s+ # scale
|
|
=\s+ # =
|
|
(\d+\.\d+)\s* # scaled code
|
|
)?
|
|
$}x) {
|
|
if ($report_type eq "by language") {
|
|
next unless %{$rhaa_Filters_by_Language->{$1}};
|
|
# above test necessary to avoid trying to sum reports
|
|
# of reports (which have no language breakdown).
|
|
$found_language = 1;
|
|
$rhh_count->{$1 }{'nFiles' } += 1;
|
|
$rhh_count->{$1 }{'blank' } += $2;
|
|
$rhh_count->{$1 }{'comment'} += $3;
|
|
$rhh_count->{$1 }{'code' } += $4;
|
|
$rhh_count->{$1 }{'scaled' } += $6 if $opt_3;
|
|
} else {
|
|
$rhh_count->{$file}{'nFiles' } += 1;
|
|
$rhh_count->{$file}{'blank' } += $2;
|
|
$rhh_count->{$file}{'comment'} += $3;
|
|
$rhh_count->{$file}{'code' } += $4;
|
|
$rhh_count->{$file}{'scaled' } += $6 if $opt_3;
|
|
}
|
|
++$n_results_found;
|
|
}
|
|
}
|
|
warn "No counts found in $file--is the file format correct?\n"
|
|
unless $n_results_found;
|
|
}
|
|
print "<- combine_results\n" if $opt_v > 2;
|
|
return $found_language;
|
|
} # 1}}}
|
|
sub compute_denominator { # {{{1
|
|
my ($method, $nCode, $nComment, $nBlank, ) = @_;
|
|
print "-> compute_denominator\n" if $opt_v > 2;
|
|
my %den = ( "c" => $nCode );
|
|
$den{"cm"} = $den{"c"} + $nComment;
|
|
$den{"cmb"} = $den{"cm"} + $nBlank;
|
|
$den{"cb"} = $den{"c"} + $nBlank;
|
|
|
|
print "<- compute_denominator\n" if $opt_v > 2;
|
|
return $den{ $method };
|
|
} # 1}}}
|
|
sub yaml_to_json_separators { # {{{1
|
|
# YAML and JSON are closely related. Their differences can be captured
|
|
# by trailing commas ($C), braces ($open_B, $close_B), and
|
|
# quotes around text ($Q).
|
|
print "-> yaml_to_json_separators()\n" if $opt_v > 2;
|
|
my ($Q, $open_B, $close_B, $start, $C);
|
|
if ($opt_json) {
|
|
$C = ',';
|
|
$Q = '"';
|
|
$open_B = '{';
|
|
$close_B = '}';
|
|
$start = '{';
|
|
} else {
|
|
$C = '';
|
|
$Q = '' ;
|
|
$open_B = '' ;
|
|
$close_B = '';
|
|
$start = "---\n# $URL\n";
|
|
}
|
|
print "<- yaml_to_json_separators()\n" if $opt_v > 2;
|
|
return ($Q, $open_B, $close_B, $start, $C);
|
|
} # 1}}}
|
|
sub diff_report { # {{{1
|
|
# returns an array of lines containing the results
|
|
print "-> diff_report\n" if $opt_v > 2;
|
|
|
|
if ($opt_xml) {
|
|
print "<- diff_report\n" if $opt_v > 2;
|
|
return diff_xml_report(@_)
|
|
} elsif ($opt_yaml) {
|
|
print "<- diff_report\n" if $opt_v > 2;
|
|
return diff_yaml_report(@_)
|
|
} elsif ($opt_json) {
|
|
print "<- diff_report\n" if $opt_v > 2;
|
|
return diff_json_report(@_)
|
|
} elsif ($opt_csv or $opt_md) {
|
|
print "<- diff_report\n" if $opt_v > 2;
|
|
return diff_csv_report(@_)
|
|
}
|
|
|
|
my ($version , # in
|
|
$elapsed_sec, # in
|
|
$report_type, # in "by language" | "by report file" | "by file"
|
|
$rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
|
|
$rh_scale , # in
|
|
) = @_;
|
|
my %orig_case = ();
|
|
if ($ON_WINDOWS and $report_type eq "by file") {
|
|
# restore the original upper/lowercase version of the file name
|
|
foreach my $lc_file (sort keys %{$rhhh_count}) {
|
|
foreach my $cat (sort keys %{$rhhh_count->{$lc_file}}) {
|
|
foreach my $S (qw(added same modified removed)) {
|
|
$orig_case{ $upper_lower_map{$lc_file} }{$cat}{$S} =
|
|
$rhhh_count->{$lc_file}{$cat}{$S};
|
|
}
|
|
}
|
|
}
|
|
$rhhh_count = \%orig_case;
|
|
}
|
|
|
|
#use Data::Dumper;
|
|
#print "diff_report: ", Dumper($rhhh_count), "\n";
|
|
my @results = ();
|
|
|
|
my $languages = ();
|
|
my %sum = (); # sum{nFiles|blank|comment|code}{same|modified|added|removed}
|
|
my $max_len = 0;
|
|
foreach my $language (keys %{$rhhh_count}) {
|
|
foreach my $V (qw(nFiles blank comment code)) {
|
|
foreach my $S (qw(added same modified removed)) {
|
|
$rhhh_count->{$language}{$V}{$S} = 0 unless
|
|
defined $rhhh_count->{$language}{$V}{$S};
|
|
$sum{$V}{$S} += $rhhh_count->{$language}{$V}{$S};
|
|
}
|
|
}
|
|
$max_len = length($language) if length($language) > $max_len;
|
|
}
|
|
my $column_1_offset = 0;
|
|
$column_1_offset = $max_len - 17 if $max_len > 17;
|
|
$elapsed_sec = 0.5 unless $elapsed_sec;
|
|
|
|
my $spacing_0 = 23;
|
|
my $spacing_1 = 13;
|
|
my $spacing_2 = 9;
|
|
my $spacing_3 = 17;
|
|
if (!$opt_3) {
|
|
$spacing_1 = 19;
|
|
$spacing_2 = 14;
|
|
$spacing_3 = 27;
|
|
}
|
|
$spacing_0 += $column_1_offset;
|
|
$spacing_1 += $column_1_offset;
|
|
$spacing_3 += $column_1_offset;
|
|
my %Format = (
|
|
'1' => { 'xml' => 'name="%s" ',
|
|
'txt' => "\%-${spacing_0}s ",
|
|
},
|
|
'2' => { 'xml' => 'name="%s" ',
|
|
'txt' => "\%-${spacing_3}s ",
|
|
},
|
|
'3' => { 'xml' => 'files_count="%d" ',
|
|
'txt' => '%6d ',
|
|
},
|
|
'4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
|
|
'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
|
|
},
|
|
'5' => { 'xml' => 'blank="%.2f" comment="%.2f" code="%d" ',
|
|
'txt' => "\%3.2f \%3.2f \%${spacing_2}d",
|
|
},
|
|
'6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
|
|
'txt' => ' x %6.2f = %14.2f',
|
|
},
|
|
);
|
|
my $Style = "txt";
|
|
$Style = "xml" if $opt_xml ;
|
|
$Style = "xml" if $opt_yaml; # not a typo; just set to anything but txt
|
|
$Style = "xml" if $opt_json; # not a typo; just set to anything but txt
|
|
$Style = "xml" if $opt_csv ; # not a typo; just set to anything but txt
|
|
|
|
my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
|
|
$hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset)
|
|
if (!$opt_3) and (68 + $column_1_offset) > 79;
|
|
my $data_line = "";
|
|
my $first_column;
|
|
my $BY_LANGUAGE = 0;
|
|
my $BY_FILE = 0;
|
|
if ($report_type eq "by language") {
|
|
$first_column = "Language";
|
|
$BY_LANGUAGE = 1;
|
|
} elsif ($report_type eq "by file") {
|
|
$first_column = "File";
|
|
$BY_FILE = 1;
|
|
} else {
|
|
$first_column = "Report File";
|
|
}
|
|
|
|
# column headers
|
|
if (!$opt_3 and $BY_FILE) {
|
|
my $spacing_n = $spacing_1 - 11;
|
|
$data_line = sprintf "%-${spacing_n}s" , $first_column;
|
|
} else {
|
|
$data_line = sprintf "%-${spacing_1}s ", $first_column;
|
|
}
|
|
if ($BY_FILE) {
|
|
$data_line .= sprintf "%${spacing_2}s" , "" ;
|
|
} else {
|
|
$data_line .= sprintf "%${spacing_2}s " , "files";
|
|
}
|
|
my $PCT_symbol = "";
|
|
$PCT_symbol = " \%" if $opt_by_percent;
|
|
$data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
|
|
"blank${PCT_symbol}" ,
|
|
"comment${PCT_symbol}" ,
|
|
"code";
|
|
|
|
if ($Style eq "txt") {
|
|
push @results, $data_line;
|
|
push @results, $hyphen_line;
|
|
}
|
|
|
|
# sort diff output in descending order of cumulative entries
|
|
foreach my $lang_or_file (sort {
|
|
($rhhh_count->{$b}{'code'}{'added'} +
|
|
$rhhh_count->{$b}{'code'}{'same'} +
|
|
$rhhh_count->{$b}{'code'}{'modified'} +
|
|
$rhhh_count->{$b}{'code'}{'removed'} ) <=>
|
|
($rhhh_count->{$a}{'code'}{'added'} +
|
|
$rhhh_count->{$a}{'code'}{'same'} +
|
|
$rhhh_count->{$a}{'code'}{'modified'} +
|
|
$rhhh_count->{$a}{'code'}{'removed'})
|
|
or $a cmp $b }
|
|
keys %{$rhhh_count}) {
|
|
|
|
if ($BY_FILE) {
|
|
push @results, rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
|
|
} else {
|
|
push @results, $lang_or_file;
|
|
}
|
|
foreach my $S (qw(same modified added removed)) {
|
|
my $indent = $spacing_1 - 2;
|
|
my $line .= sprintf " %-${indent}s", $S;
|
|
if ($BY_FILE) {
|
|
$line .= sprintf " ";
|
|
} else {
|
|
$line .= sprintf " %${spacing_2}s", $rhhh_count->{$lang_or_file}{'nFiles'}{$S};
|
|
}
|
|
if ($opt_by_percent) {
|
|
my $DEN = compute_denominator($opt_by_percent ,
|
|
$rhhh_count->{$lang_or_file}{'code'}{$S} ,
|
|
$rhhh_count->{$lang_or_file}{'comment'}{$S},
|
|
$rhhh_count->{$lang_or_file}{'blank'}{$S} );
|
|
if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
|
|
$line .= sprintf " %14.2f %14.2f %${spacing_2}s",
|
|
$rhhh_count->{$lang_or_file}{'blank'}{$S} / $DEN * 100,
|
|
$rhhh_count->{$lang_or_file}{'comment'}{$S} / $DEN * 100,
|
|
$rhhh_count->{$lang_or_file}{'code'}{$S} ;
|
|
} else {
|
|
$line .= sprintf " %14.2f %14.2f %${spacing_2}s",
|
|
0.0, 0.0, $rhhh_count->{$lang_or_file}{'code'}{$S} ;
|
|
}
|
|
} else {
|
|
$line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
|
|
$rhhh_count->{$lang_or_file}{'blank'}{$S} ,
|
|
$rhhh_count->{$lang_or_file}{'comment'}{$S} ,
|
|
$rhhh_count->{$lang_or_file}{'code'}{$S} ;
|
|
}
|
|
push @results, $line;
|
|
}
|
|
}
|
|
push @results, $hyphen_line;
|
|
push @results, "SUM:";
|
|
my $sum_files = 0;
|
|
my $sum_lines = 0;
|
|
foreach my $S (qw(same modified added removed)) {
|
|
my $indent = $spacing_1 - 2;
|
|
my $line .= sprintf " %-${indent}s", $S;
|
|
if ($BY_FILE) {
|
|
$line .= sprintf " ";
|
|
$sum_files += 1;
|
|
} else {
|
|
$line .= sprintf " %${spacing_2}s", $sum{'nFiles'}{$S};
|
|
$sum_files += $sum{'nFiles'}{$S};
|
|
}
|
|
if ($opt_by_percent) {
|
|
my $DEN = compute_denominator($opt_by_percent,
|
|
$sum{'code'}{$S}, $sum{'comment'}{$S}, $sum{'blank'}{$S});
|
|
if ($sum{'code'}{$S} > 0) {
|
|
$line .= sprintf " %14.2f %14.2f %${spacing_2}s",
|
|
$sum{'blank'}{$S} / $DEN * 100,
|
|
$sum{'comment'}{$S} / $DEN * 100,
|
|
$sum{'code'}{$S} ;
|
|
} else {
|
|
$line .= sprintf " %14.2f %14.2f %${spacing_2}s",
|
|
0.0, 0.0, $sum{'code'}{$S} ;
|
|
}
|
|
} else {
|
|
$line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s",
|
|
$sum{'blank'}{$S} ,
|
|
$sum{'comment'}{$S} ,
|
|
$sum{'code'}{$S} ;
|
|
}
|
|
$sum_lines += $sum{'blank'}{$S} + $sum{'comment'}{$S} + $sum{'code'}{$S};
|
|
push @results, $line;
|
|
}
|
|
|
|
my $header_line = sprintf "%s v %s", $URL, $version;
|
|
$header_line .= sprintf(" T=%.2f s (%.1f files/s, %.1f lines/s)",
|
|
$elapsed_sec ,
|
|
$sum_files/$elapsed_sec,
|
|
$sum_lines/$elapsed_sec) unless $opt_sum_reports or $opt_hide_rate;
|
|
if ($Style eq "txt") {
|
|
unshift @results, output_header($header_line, $hyphen_line, $BY_FILE);
|
|
}
|
|
|
|
push @results, $hyphen_line;
|
|
write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
|
|
print "<- diff_report\n" if $opt_v > 2;
|
|
|
|
return @results;
|
|
} # 1}}}
|
|
sub xml_yaml_or_json_header { # {{{1
|
|
my ($URL, $version, $elapsed_sec, $sum_files, $sum_lines, $by_file) = @_;
|
|
print "-> xml_yaml_or_json_header\n" if $opt_v > 2;
|
|
my $header = "";
|
|
my $file_rate = $sum_files/$elapsed_sec;
|
|
my $line_rate = $sum_lines/$elapsed_sec;
|
|
my $type = "";
|
|
$type = "diff_" if $opt_diff;
|
|
my $report_file = "";
|
|
if ($opt_report_file) {
|
|
my $Fname = $opt_report_file;
|
|
$Fname =~ s{\\}{\\\\}g if $ON_WINDOWS;
|
|
if ($opt_sum_reports) {
|
|
if ($by_file) {
|
|
$report_file = " <report_file>$Fname.file</report_file>"
|
|
} else {
|
|
$report_file = " <report_file>$Fname.lang</report_file>"
|
|
}
|
|
} else {
|
|
$report_file = " <report_file>$Fname</report_file>"
|
|
}
|
|
}
|
|
if ($opt_xml) {
|
|
$header = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
|
|
$header .= "\n<?xml-stylesheet type=\"text/xsl\" href=\"" . $opt_xsl . "\"?>" if $opt_xsl;
|
|
if ($opt_hide_rate) {
|
|
$header .= "<${type}results>
|
|
<header>
|
|
<cloc_url>$URL</cloc_url>
|
|
<cloc_version>$version</cloc_version>
|
|
<n_files>$sum_files</n_files>
|
|
<n_lines>$sum_lines</n_lines>";
|
|
} else {
|
|
$header .= "<${type}results>
|
|
<header>
|
|
<cloc_url>$URL</cloc_url>
|
|
<cloc_version>$version</cloc_version>
|
|
<elapsed_seconds>$elapsed_sec</elapsed_seconds>
|
|
<n_files>$sum_files</n_files>
|
|
<n_lines>$sum_lines</n_lines>
|
|
<files_per_second>$file_rate</files_per_second>
|
|
<lines_per_second>$line_rate</lines_per_second>";
|
|
}
|
|
$header .= "\n$report_file"
|
|
if $opt_report_file;
|
|
$header .= "\n</header>";
|
|
if (%git_metadata) {
|
|
foreach my $target (keys %git_metadata) {
|
|
$header .= "\n<source>";
|
|
$header .= "\n <target>$target</target>";
|
|
$header .= "\n <origin>$git_metadata{$target}{'origin'}</origin>";
|
|
$header .= "\n <branch>$git_metadata{$target}{'branch'}</branch>";
|
|
$header .= "\n <commit>$git_metadata{$target}{'commit'}</commit>";
|
|
$header .= "\n</source>";
|
|
}
|
|
}
|
|
} elsif ($opt_yaml or $opt_json) {
|
|
my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
|
|
if ($opt_hide_rate) {
|
|
$header = "${start}${Q}header${Q} : $open_B
|
|
${Q}cloc_url${Q} : ${Q}$URL${Q}${C}
|
|
${Q}cloc_version${Q} : ${Q}$version${Q}${C}
|
|
${Q}n_files${Q} : $sum_files${C}
|
|
${Q}n_lines${Q} : $sum_lines";
|
|
} else {
|
|
$header = "${start}${Q}header${Q} : $open_B
|
|
${Q}cloc_url${Q} : ${Q}$URL${Q}${C}
|
|
${Q}cloc_version${Q} : ${Q}$version${Q}${C}
|
|
${Q}elapsed_seconds${Q} : $elapsed_sec${C}
|
|
${Q}n_files${Q} : $sum_files${C}
|
|
${Q}n_lines${Q} : $sum_lines${C}
|
|
${Q}files_per_second${Q} : $file_rate${C}
|
|
${Q}lines_per_second${Q} : $line_rate";
|
|
}
|
|
if ($opt_report_file) {
|
|
my $Fname = $opt_report_file;
|
|
$Fname =~ s{\\}{\\\\}g if $ON_WINDOWS;
|
|
if ($opt_sum_reports) {
|
|
if ($by_file) {
|
|
$header .= "$C\n ${Q}report_file${Q} : ${Q}$Fname.file${Q}"
|
|
} else {
|
|
$header .= "$C\n ${Q}report_file${Q} : ${Q}$Fname.lang${Q}"
|
|
}
|
|
} else {
|
|
$header .= "$C\n ${Q}report_file${Q} : ${Q}$Fname${Q}";
|
|
}
|
|
}
|
|
$header .= "${close_B}${C}";
|
|
}
|
|
print "<- xml_yaml_or_json_header\n" if $opt_v > 2;
|
|
return $header;
|
|
} # 1}}}
|
|
sub diff_yaml_report { # {{{1
|
|
# returns an array of lines containing the results
|
|
my ($version , # in
|
|
$elapsed_sec, # in
|
|
$report_type, # in "by language" | "by report file" | "by file"
|
|
$rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
|
|
$rh_scale , # in
|
|
) = @_;
|
|
print "-> diff_yaml_report\n" if $opt_v > 2;
|
|
$elapsed_sec = 0.5 unless $elapsed_sec;
|
|
my @results = ();
|
|
my %sum = ();
|
|
my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
|
|
diff_header_sum($report_type, $rhhh_count, \%sum);
|
|
|
|
if (!$ALREADY_SHOWED_HEADER) {
|
|
push @results,
|
|
xml_yaml_or_json_header($URL, $version, $elapsed_sec,
|
|
$sum_files, $sum_lines, $BY_FILE);
|
|
$ALREADY_SHOWED_HEADER = 1;
|
|
}
|
|
foreach my $S (qw(added same modified removed)) {
|
|
push @results, "$S :";
|
|
foreach my $F_or_L (keys %{$rhhh_count}) {
|
|
# force quoted language or filename in case these
|
|
# have embedded funny characters, issue #312
|
|
push @results, " '" . rm_leading_tempdir($F_or_L, \%TEMP_DIR) . "' :";
|
|
foreach my $k (keys %{$rhhh_count->{$F_or_L}}) {
|
|
next if $k eq "lang"; # present only in those cases
|
|
# where code exists for action $S
|
|
$rhhh_count->{$F_or_L}{$k}{$S} = 0 unless
|
|
defined $rhhh_count->{$F_or_L}{$k}{$S};
|
|
push @results,
|
|
" $k : $rhhh_count->{$F_or_L}{$k}{$S}";
|
|
}
|
|
}
|
|
}
|
|
|
|
push @results, "SUM :";
|
|
foreach my $S (qw(added same modified removed)) {
|
|
push @results, " $S :";
|
|
foreach my $topic (keys %sum) {
|
|
push @results, " $topic : $sum{$topic}{$S}";
|
|
}
|
|
}
|
|
|
|
print "<- diff_yaml_report\n" if $opt_v > 2;
|
|
|
|
return @results;
|
|
} # 1}}}
|
|
sub diff_json_report { # {{{1
|
|
# returns an array of lines containing the results
|
|
my ($version , # in
|
|
$elapsed_sec, # in
|
|
$report_type, # in "by language" | "by report file" | "by file"
|
|
$rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
|
|
$rh_scale , # in
|
|
) = @_;
|
|
print "-> diff_json_report\n" if $opt_v > 2;
|
|
$elapsed_sec = 0.5 unless $elapsed_sec;
|
|
my @results = ();
|
|
my %sum = ();
|
|
my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
|
|
diff_header_sum($report_type, $rhhh_count, \%sum);
|
|
|
|
if (!$ALREADY_SHOWED_HEADER) {
|
|
push @results,
|
|
xml_yaml_or_json_header($URL, $version, $elapsed_sec,
|
|
$sum_files, $sum_lines, $BY_FILE);
|
|
$ALREADY_SHOWED_HEADER = 1;
|
|
}
|
|
foreach my $S (qw(added same modified removed)) {
|
|
push @results, " \"$S\" : {";
|
|
foreach my $F_or_L (keys %{$rhhh_count}) {
|
|
push @results, " \"" . rm_leading_tempdir($F_or_L, \%TEMP_DIR) . "\" : {";
|
|
foreach my $k (keys %{$rhhh_count->{$F_or_L}}) {
|
|
next if $k eq "lang"; # present only in those cases
|
|
# where code exists for action $S
|
|
$rhhh_count->{$F_or_L}{$k}{$S} = 0 unless
|
|
defined $rhhh_count->{$F_or_L}{$k}{$S};
|
|
push @results,
|
|
" \"$k\" : $rhhh_count->{$F_or_L}{$k}{$S},";
|
|
}
|
|
$results[-1] =~ s/,\s*$//;
|
|
push @results, " },"
|
|
}
|
|
$results[-1] =~ s/,\s*$//;
|
|
push @results, " },"
|
|
}
|
|
|
|
push @results, " \"SUM\" : {";
|
|
foreach my $S (qw(added same modified removed)) {
|
|
push @results, " \"$S\" : {";
|
|
foreach my $topic (keys %sum) {
|
|
push @results, " \"$topic\" : $sum{$topic}{$S},";
|
|
}
|
|
$results[-1] =~ s/,\s*$//;
|
|
push @results, "},";
|
|
}
|
|
|
|
$results[-1] =~ s/,\s*$//;
|
|
push @results, "} }";
|
|
print "<- diff_json_report\n" if $opt_v > 2;
|
|
return @results;
|
|
} # 1}}}
|
|
sub diff_header_sum { # {{{1
|
|
my ($report_type, # in "by language" | "by report file" | "by file"
|
|
$rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
|
|
$rhh_sum , # out sum{nFiles|blank|comment|code}{same|modified|added|removed}
|
|
) = @_;
|
|
|
|
my $sum_files = 0;
|
|
my $sum_lines = 0;
|
|
foreach my $language (keys %{$rhhh_count}) {
|
|
foreach my $V (qw(nFiles blank comment code)) {
|
|
foreach my $S (qw(added same modified removed)) {
|
|
$rhhh_count->{$language}{$V}{$S} = 0 unless
|
|
defined $rhhh_count->{$language}{$V}{$S};
|
|
$rhh_sum->{$V}{$S} += $rhhh_count->{$language}{$V}{$S};
|
|
if ($V eq "nFiles") {
|
|
$sum_files += $rhhh_count->{$language}{$V}{$S};
|
|
} else {
|
|
$sum_lines += $rhhh_count->{$language}{$V}{$S};
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
my $BY_LANGUAGE = 0;
|
|
my $BY_FILE = 0;
|
|
if ($report_type eq "by language") {
|
|
$BY_LANGUAGE = 1;
|
|
} elsif ($report_type eq "by file") {
|
|
$BY_FILE = 1;
|
|
}
|
|
return $sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE;
|
|
} # 1}}}
|
|
sub diff_xml_report { # {{{1
|
|
# returns an array of lines containing the results
|
|
my ($version , # in
|
|
$elapsed_sec, # in
|
|
$report_type, # in "by language" | "by report file" | "by file"
|
|
$rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
|
|
$rh_scale , # in
|
|
) = @_;
|
|
print "-> diff_xml_report\n" if $opt_v > 2;
|
|
my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
|
|
|
|
#print "diff_report: ", Dumper($rhhh_count), "\n";
|
|
$elapsed_sec = 0.5 unless $elapsed_sec;
|
|
my @results = ();
|
|
my %sum = ();
|
|
my $languages = ();
|
|
|
|
my ($sum_lines, $sum_files, $BY_FILE, $BY_LANGUAGE) =
|
|
diff_header_sum($report_type, $rhhh_count, \%sum);
|
|
|
|
my $data_line = "";
|
|
|
|
if (!$ALREADY_SHOWED_HEADER) {
|
|
push @results,
|
|
xml_yaml_or_json_header($URL, $version, $elapsed_sec,
|
|
$sum_files, $sum_lines, $BY_FILE);
|
|
$ALREADY_SHOWED_HEADER = 1;
|
|
}
|
|
|
|
foreach my $S (qw(same modified added removed)) {
|
|
push @results, " <$S>";
|
|
foreach my $lang_or_file (sort {
|
|
$rhhh_count->{$b}{'code'} <=>
|
|
$rhhh_count->{$a}{'code'}
|
|
}
|
|
keys %{$rhhh_count}) {
|
|
my $L = "";
|
|
|
|
if ($BY_FILE) {
|
|
$L .= sprintf " <file name=\"%s\" files_count=\"1\" ",
|
|
xml_metachars(
|
|
rm_leading_tempdir($lang_or_file, \%TEMP_DIR));
|
|
} else {
|
|
$L .= sprintf " <language name=\"%s\" files_count=\"%d\" ",
|
|
$lang_or_file ,
|
|
$rhhh_count->{$lang_or_file}{'nFiles'}{$S};
|
|
}
|
|
if ($opt_by_percent) {
|
|
my $DEN = compute_denominator($opt_by_percent ,
|
|
$rhhh_count->{$lang_or_file}{'code'}{$S} ,
|
|
$rhhh_count->{$lang_or_file}{'comment'}{$S},
|
|
$rhhh_count->{$lang_or_file}{'blank'}{$S} );
|
|
foreach my $T (qw(blank comment)) {
|
|
if ($rhhh_count->{$lang_or_file}{'code'}{$S} > 0) {
|
|
$L .= sprintf "%s=\"%.2f\" ",
|
|
$T, $rhhh_count->{$lang_or_file}{$T}{$S} / $DEN * 100;
|
|
} else {
|
|
$L .= sprintf "%s=\"0.0\" ", $T;
|
|
}
|
|
}
|
|
foreach my $T (qw(code)) {
|
|
$L .= sprintf "%s=\"%d\" ",
|
|
$T, $rhhh_count->{$lang_or_file}{$T}{$S};
|
|
}
|
|
} else {
|
|
foreach my $T (qw(blank comment code)) {
|
|
$L .= sprintf "%s=\"%d\" ",
|
|
$T, $rhhh_count->{$lang_or_file}{$T}{$S};
|
|
}
|
|
}
|
|
push @results, $L . "/>";
|
|
}
|
|
|
|
|
|
my $L = sprintf " <total sum_files=\"%d\" ", $sum{'nFiles'}{$S};
|
|
if ($opt_by_percent) {
|
|
my $DEN = compute_denominator($opt_by_percent,
|
|
$sum{'code'}{$S} ,
|
|
$sum{'comment'}{$S},
|
|
$sum{'blank'}{$S} );
|
|
foreach my $V (qw(blank comment)) {
|
|
if ($sum{'code'}{$S} > 0) {
|
|
$L .= sprintf "%s=\"%.2f\" ", $V, $sum{$V}{$S} / $DEN * 100;
|
|
} else {
|
|
$L .= sprintf "%s=\"0.0\" ", $V;
|
|
}
|
|
}
|
|
foreach my $V (qw(code)) {
|
|
$L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
|
|
}
|
|
} else {
|
|
foreach my $V (qw(blank comment code)) {
|
|
$L .= sprintf "%s=\"%d\" ", $V, $sum{$V}{$S};
|
|
}
|
|
}
|
|
push @results, $L . "/>";
|
|
push @results, " </$S>";
|
|
}
|
|
|
|
push @results, "</diff_results>";
|
|
write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
|
|
print "<- diff_xml_report\n" if $opt_v > 2;
|
|
return @results;
|
|
} # 1}}}
|
|
sub diff_csv_report { # {{{1
|
|
# returns an array of lines containing the results
|
|
my ($version , # in
|
|
$elapsed_sec, # in
|
|
$report_type, # in "by language" | "by report file" | "by file"
|
|
$rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s}
|
|
$rh_scale , # in unused
|
|
) = @_;
|
|
print "-> diff_csv_report\n" if $opt_v > 2;
|
|
|
|
my @results = ();
|
|
my $languages = ();
|
|
|
|
my $data_line = "";
|
|
my $BY_LANGUAGE = 0;
|
|
my $BY_FILE = 0;
|
|
if ($report_type eq "by language") {
|
|
$BY_LANGUAGE = 1;
|
|
} elsif ($report_type eq "by file") {
|
|
$BY_FILE = 1;
|
|
}
|
|
my $DELIM = ",";
|
|
$DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
|
|
$DELIM = "|" if defined $opt_md;
|
|
|
|
$elapsed_sec = 0.5 unless $elapsed_sec;
|
|
|
|
my $line = "Language${DELIM} ";
|
|
$line = "File${DELIM} " if $BY_FILE;
|
|
foreach my $item (qw(files blank comment code)) {
|
|
next if $BY_FILE and $item eq 'files';
|
|
foreach my $symbol ( '==', '!=', '+', '-', ) {
|
|
$line .= "$symbol $item${DELIM} ";
|
|
}
|
|
}
|
|
|
|
my $T_elapsed_sec = "T=$elapsed_sec s";
|
|
$T_elapsed_sec = "" if $opt_hide_rate;
|
|
|
|
if ($opt_md) {
|
|
push @results, "cloc|$URL v $version $T_elapsed_sec";
|
|
push @results, "--- | ---";
|
|
push @results, "";
|
|
push @results, $line;
|
|
my @col_header = ();
|
|
push @col_header, ":-------";
|
|
foreach (1..16) {
|
|
push @col_header, "-------:";
|
|
}
|
|
push @results, join("|", @col_header) . "|";
|
|
} else {
|
|
$line .= "\"$URL v $version $T_elapsed_sec\"";
|
|
push @results, $line;
|
|
}
|
|
|
|
foreach my $lang_or_file (keys %{$rhhh_count}) {
|
|
$rhhh_count->{$lang_or_file}{'code'}{'added'} = 0 unless
|
|
defined $rhhh_count->{$lang_or_file}{'code'};
|
|
}
|
|
foreach my $lang_or_file (sort {
|
|
$rhhh_count->{$b}{'code'} <=>
|
|
$rhhh_count->{$a}{'code'}
|
|
}
|
|
keys %{$rhhh_count}) {
|
|
if ($BY_FILE) {
|
|
$line = rm_leading_tempdir($lang_or_file, \%TEMP_DIR) . "$DELIM ";
|
|
} else {
|
|
$line = $lang_or_file . "${DELIM} ";
|
|
}
|
|
if ($opt_by_percent) {
|
|
foreach my $item (qw(nFiles)) {
|
|
next if $BY_FILE and $item eq 'nFiles';
|
|
foreach my $symbol (qw(same modified added removed)) {
|
|
if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
|
|
$line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
|
|
} else {
|
|
$line .= "0${DELIM} ";
|
|
}
|
|
}
|
|
}
|
|
foreach my $item (qw(blank comment)) {
|
|
foreach my $symbol (qw(same modified added removed)) {
|
|
if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol} and
|
|
defined $rhhh_count->{$lang_or_file}{'code'}{$symbol} and
|
|
$rhhh_count->{$lang_or_file}{'code'}{$symbol} > 0) {
|
|
$line .= sprintf("%.2f", $rhhh_count->{$lang_or_file}{$item}{$symbol} / $rhhh_count->{$lang_or_file}{'code'}{$symbol} * 100).${DELIM};
|
|
} else {
|
|
$line .= "0.00${DELIM} ";
|
|
}
|
|
}
|
|
}
|
|
foreach my $item (qw(code)) {
|
|
foreach my $symbol (qw(same modified added removed)) {
|
|
if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
|
|
$line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
|
|
} else {
|
|
$line .= "0${DELIM} ";
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
foreach my $item (qw(nFiles blank comment code)) {
|
|
next if $BY_FILE and $item eq 'nFiles';
|
|
foreach my $symbol (qw(same modified added removed)) {
|
|
if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) {
|
|
$line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}${DELIM} ";
|
|
} else {
|
|
$line .= "0${DELIM} ";
|
|
}
|
|
}
|
|
}
|
|
}
|
|
push @results, $line;
|
|
}
|
|
|
|
print "<- diff_csv_report\n" if $opt_v > 2;
|
|
return @results;
|
|
} # 1}}}
|
|
sub rm_leading_tempdir { # {{{1
|
|
my ($in_file, $rh_temp_dirs, ) = @_;
|
|
my $clean_filename = $in_file;
|
|
foreach my $temp_d (keys %{$rh_temp_dirs}) {
|
|
if ($ON_WINDOWS) {
|
|
# \ -> / necessary to allow the next if test's
|
|
# m{} to work in the presence of spaces in file names
|
|
$temp_d =~ s{\\}{/}g;
|
|
$clean_filename =~ s{\\}{/}g;
|
|
}
|
|
if ($clean_filename =~ m{^$temp_d/}) {
|
|
$clean_filename =~ s{^$temp_d/}{};
|
|
last;
|
|
}
|
|
}
|
|
if ($ON_WINDOWS and $opt_by_file) { # then go back from / to \
|
|
if ($opt_json) {
|
|
$clean_filename =~ s{/}{\\\\}g;
|
|
} else {
|
|
$clean_filename =~ s{/}{\\}g;
|
|
}
|
|
}
|
|
return $clean_filename;
|
|
} # 1}}}
|
|
sub generate_sql { # {{{1
|
|
my ($elapsed_sec, # in
|
|
$rhh_count , # in count{TYPE}{lang|code|blank|comment|scaled}
|
|
$rh_scale , # in
|
|
) = @_;
|
|
print "-> generate_sql\n" if $opt_v > 2;
|
|
|
|
#print "generate_sql A [$opt_sql_project]\n";
|
|
$opt_sql_project = cwd() unless defined $opt_sql_project;
|
|
$opt_sql_project = '' unless defined $opt_sql_project; # have seen cwd() fail
|
|
#print "generate_sql B [$opt_sql_project]\n";
|
|
$opt_sql_project =~ s{/}{\\}g if $ON_WINDOWS;
|
|
#print "generate_sql C [$opt_sql_project]\n";
|
|
|
|
my $schema = undef;
|
|
if ($opt_sql_style eq "oracle") {
|
|
$schema = "
|
|
CREATE TABLE metadata
|
|
(
|
|
id INTEGER PRIMARY KEY,
|
|
timestamp TIMESTAMP,
|
|
project VARCHAR2(500 CHAR),
|
|
elapsed_s NUMBER(10, 6)
|
|
)
|
|
/
|
|
|
|
CREATE TABLE t
|
|
(
|
|
id INTEGER ,
|
|
project VARCHAR2(500 CHAR),
|
|
language VARCHAR2(500 CHAR),
|
|
file_fullname VARCHAR2(500 CHAR),
|
|
file_dirname VARCHAR2(500 CHAR),
|
|
file_basename VARCHAR2(500 CHAR),
|
|
nblank INTEGER,
|
|
ncomment INTEGER,
|
|
ncode INTEGER,
|
|
nscaled NUMBER(10, 6),
|
|
FOREIGN KEY (id)
|
|
REFERENCES metadata (id)
|
|
)
|
|
/
|
|
|
|
";
|
|
} else {
|
|
$schema = "
|
|
create table metadata ( -- $URL v $VERSION
|
|
id integer primary key,
|
|
timestamp varchar(500),
|
|
Project varchar(500),
|
|
elapsed_s real);
|
|
create table t (
|
|
id integer ,
|
|
Project varchar(500) ,
|
|
Language varchar(500) ,
|
|
File varchar(500) ,
|
|
File_dirname varchar(500) ,
|
|
File_basename varchar(500) ,
|
|
nBlank integer ,
|
|
nComment integer ,
|
|
nCode integer ,
|
|
nScaled real ,
|
|
foreign key (id)
|
|
references metadata (id));
|
|
";
|
|
}
|
|
$opt_sql = "-" if $opt_sql eq "1";
|
|
|
|
my $open_mode = ">";
|
|
$open_mode = ">>" if $opt_sql_append;
|
|
|
|
my $fh;
|
|
if ($ON_WINDOWS and $HAVE_Win32_Long_Path and $opt_sql ne "-") {
|
|
# only use the Win32::LongPath wrapper here when needed,
|
|
# and only when not writing to STDOUT.
|
|
$fh = open_file($open_mode, $opt_sql, 1);
|
|
die "Unable to write to $opt_sql\n" if !defined $fh;
|
|
} else {
|
|
$fh = new IO::File; # $opt_sql, "w";
|
|
if (!$fh->open("${open_mode}${opt_sql}")) {
|
|
die "Unable to write to $opt_sql $!\n";
|
|
}
|
|
}
|
|
print $fh $schema unless defined $opt_sql_append;
|
|
|
|
my $id = int(time());
|
|
my $insert_into_t = "insert into t ";
|
|
if ($opt_sql_style eq "oracle") {
|
|
printf $fh "insert into metadata values(%d, TO_TIMESTAMP('%s','yyyy-mm-dd hh24:mi:ss'), '%s', %f);\n",
|
|
$id,
|
|
strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
|
|
$opt_sql_project, $elapsed_sec;
|
|
} elsif ($opt_sql_style eq "named_columns") {
|
|
print $fh "begin transaction;\n";
|
|
$insert_into_t .= "(id, Project, Language, File, File_dirname, File_basename, nBlank, nComment, nCode, nScaled )";
|
|
} else {
|
|
print $fh "begin transaction;\n";
|
|
printf $fh "insert into metadata values(%d, '%s', '%s', %f);\n",
|
|
$id, strftime("%Y-%m-%d %H:%M:%S", localtime(time())),
|
|
$opt_sql_project, $elapsed_sec;
|
|
}
|
|
|
|
my $nIns = 0;
|
|
foreach my $file (keys %{$rhh_count}) {
|
|
my $language = $rhh_count->{$file}{'lang'};
|
|
my $clean_filename = $file;
|
|
# If necessary (that is, if the input contained an
|
|
# archive file [.tar.gz, etc]), strip the temporary
|
|
# directory name which was used to expand the archive
|
|
# from the file name.
|
|
|
|
$clean_filename = rm_leading_tempdir($clean_filename, \%TEMP_DIR);
|
|
$clean_filename =~ s/\'/''/g; # double embedded single quotes
|
|
# to escape them
|
|
|
|
printf $fh "$insert_into_t values(%d, '%s', '%s', '%s', '%s', '%s', " .
|
|
"%d, %d, %d, %f);\n",
|
|
$id ,
|
|
$opt_sql_project ,
|
|
$language ,
|
|
$clean_filename ,
|
|
dirname( $clean_filename) ,
|
|
basename($clean_filename) ,
|
|
$rhh_count->{$file}{'blank'},
|
|
$rhh_count->{$file}{'comment'},
|
|
$rhh_count->{$file}{'code'} ,
|
|
$rhh_count->{$file}{'code'}*$rh_scale->{$language};
|
|
|
|
++$nIns;
|
|
if (!($nIns % 10_000) and ($opt_sql_style ne "oracle")) {
|
|
print $fh "commit;\n";
|
|
print $fh "begin transaction;\n";
|
|
}
|
|
}
|
|
if ($opt_sql_style ne "oracle") {
|
|
print $fh "commit;\n";
|
|
}
|
|
|
|
$fh->close unless $opt_sql eq "-"; # don't try to close STDOUT
|
|
print "<- generate_sql\n" if $opt_v > 2;
|
|
|
|
# sample query:
|
|
#
|
|
# select project, language,
|
|
# sum(nCode) as Code,
|
|
# sum(nComment) as Comments,
|
|
# sum(nBlank) as Blank,
|
|
# sum(nCode)+sum(nComment)+sum(nBlank) as All_Lines,
|
|
# 100.0*sum(nComment)/(sum(nCode)+sum(nComment)) as Comment_Pct
|
|
# from t group by Project, Language order by Project, Code desc;
|
|
#
|
|
} # 1}}}
|
|
sub output_header { # {{{1
|
|
my ($header_line,
|
|
$hyphen_line,
|
|
$BY_FILE ,) = @_;
|
|
print "-> output_header\n" if $opt_v > 2;
|
|
my @R = ();
|
|
if ($opt_xml) {
|
|
if (!$ALREADY_SHOWED_XML_SECTION) {
|
|
push @R, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
|
|
push @R, '<?xml-stylesheet type="text/xsl" href="' .
|
|
$opt_xsl . '"?>' if $opt_xsl;
|
|
push @R, "<results>";
|
|
push @R, "<header>$header_line</header>";
|
|
$ALREADY_SHOWED_XML_SECTION = 1;
|
|
}
|
|
if ($BY_FILE) {
|
|
push @R, "<files>";
|
|
} else {
|
|
push @R, "<languages>";
|
|
}
|
|
} elsif ($opt_yaml) {
|
|
push @R, "---\n# $header_line";
|
|
} elsif ($opt_csv or $opt_md) {
|
|
# append the header to the end of the column headers
|
|
# to keep the output a bit cleaner from a spreadsheet
|
|
# perspective
|
|
} else {
|
|
if ($ALREADY_SHOWED_HEADER) {
|
|
push @R, "";
|
|
} else {
|
|
push @R, $header_line;
|
|
$ALREADY_SHOWED_HEADER = 1;
|
|
}
|
|
push @R, $hyphen_line;
|
|
}
|
|
print "<- output_header\n" if $opt_v > 2;
|
|
return @R;
|
|
} # 1}}}
|
|
sub generate_report { # {{{1
|
|
# returns an array of lines containing the results
|
|
my ($version , # in
|
|
$elapsed_sec, # in
|
|
$report_type, # in "by language" | "by report file" | "by file"
|
|
$rhh_count , # in count{TYPE}{nFiles|code|blank|comment|scaled}
|
|
# where TYPE = name of language, source file,
|
|
# or report file
|
|
$rh_scale , # in
|
|
) = @_;
|
|
|
|
my %orig_case = ();
|
|
if ($ON_WINDOWS and $report_type eq "by file") {
|
|
# restore the original upper/lowercase version of the file name
|
|
foreach my $lc_file (sort keys %{$rhh_count}) {
|
|
foreach my $cat (sort keys %{$rhh_count->{$lc_file}}) {
|
|
$orig_case{ $upper_lower_map{$lc_file} }{$cat} =
|
|
$rhh_count->{$lc_file}{$cat};
|
|
}
|
|
}
|
|
$rhh_count = \%orig_case;
|
|
}
|
|
print "-> generate_report\n" if $opt_v > 2;
|
|
my $DELIM = ",";
|
|
$DELIM = $opt_csv_delimiter if defined $opt_csv_delimiter;
|
|
$DELIM = "|" if defined $opt_md;
|
|
|
|
my @results = ();
|
|
|
|
my $languages = ();
|
|
|
|
my $sum_files = 0;
|
|
my $sum_code = 0;
|
|
my $sum_blank = 0;
|
|
my $sum_comment = 0;
|
|
my $max_len = 0;
|
|
foreach my $language (keys %{$rhh_count}) {
|
|
$sum_files += $rhh_count->{$language}{'nFiles'} ;
|
|
$sum_blank += $rhh_count->{$language}{'blank'} ;
|
|
$sum_comment += $rhh_count->{$language}{'comment'};
|
|
$sum_code += $rhh_count->{$language}{'code'} ;
|
|
$max_len = length($language) if length($language) > $max_len;
|
|
}
|
|
my $column_1_offset = 0;
|
|
$column_1_offset = $max_len - 17 if $max_len > 17;
|
|
my $sum_lines = $sum_blank + $sum_comment + $sum_code;
|
|
$elapsed_sec = 0.5 unless $elapsed_sec;
|
|
|
|
my $spacing_0 = 23;
|
|
my $spacing_1 = 13;
|
|
my $spacing_2 = 9;
|
|
my $spacing_3 = 17;
|
|
if (!$opt_3) {
|
|
$spacing_1 = 19;
|
|
$spacing_2 = 14;
|
|
$spacing_3 = 27;
|
|
}
|
|
$spacing_0 += $column_1_offset;
|
|
$spacing_1 += $column_1_offset;
|
|
$spacing_3 += $column_1_offset;
|
|
my %Format = (
|
|
'1' => { 'xml' => 'name="%s" ',
|
|
'txt' => "\%-${spacing_0}s ",
|
|
},
|
|
'2' => { 'xml' => 'name="%s" ',
|
|
'txt' => "\%-${spacing_3}s ",
|
|
},
|
|
'3' => { 'xml' => 'files_count="%d" ',
|
|
'txt' => '%6d ',
|
|
},
|
|
'4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ',
|
|
'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d",
|
|
},
|
|
'5' => { 'xml' => 'blank="%3.2f" comment="%3.2f" code="%d" ',
|
|
'txt' => "\%14.2f \%14.2f \%${spacing_2}d",
|
|
},
|
|
'6' => { 'xml' => 'factor="%.2f" scaled="%.2f" ',
|
|
'txt' => ' x %6.2f = %14.2f',
|
|
},
|
|
);
|
|
my $Style = "txt";
|
|
$Style = "xml" if $opt_xml ;
|
|
$Style = "xml" if $opt_yaml; # not a typo; just set to anything but txt
|
|
$Style = "xml" if $opt_json; # not a typo; just set to anything but txt
|
|
$Style = "xml" if $opt_csv ; # not a typo; just set to anything but txt
|
|
|
|
my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset);
|
|
$hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset)
|
|
if (!$opt_sum_reports) and (!$opt_3) and (68 + $column_1_offset) > 79;
|
|
my $data_line = "";
|
|
my $first_column;
|
|
my $BY_LANGUAGE = 0;
|
|
my $BY_FILE = 0;
|
|
if ($report_type eq "by language") {
|
|
$first_column = "Language";
|
|
$BY_LANGUAGE = 1;
|
|
} elsif ($report_type eq "by file") {
|
|
$first_column = "File";
|
|
$BY_FILE = 1;
|
|
} elsif ($report_type eq "by report file") {
|
|
$first_column = "File";
|
|
} else {
|
|
$first_column = "Report File";
|
|
}
|
|
|
|
my $header_line = sprintf "%s v %s", $URL, $version;
|
|
$header_line .= sprintf(" T=%.2f s (%.1f files/s, %.1f lines/s)",
|
|
$elapsed_sec ,
|
|
$sum_files/$elapsed_sec,
|
|
$sum_lines/$elapsed_sec) unless $opt_sum_reports or $opt_hide_rate;
|
|
if ($opt_xml or $opt_yaml or $opt_json) {
|
|
if (!$ALREADY_SHOWED_HEADER) {
|
|
if ($opt_by_file_by_lang and $opt_json) {
|
|
push @results, '{ "by_file" : ';
|
|
}
|
|
push @results, xml_yaml_or_json_header($URL, $version, $elapsed_sec,
|
|
$sum_files, $sum_lines, $BY_FILE);
|
|
# $ALREADY_SHOWED_HEADER = 1 unless $opt_sum_reports;
|
|
# --sum-reports yields two xml or yaml files, one by
|
|
# language and one by report file, each of which needs a header
|
|
}
|
|
if ($opt_xml) {
|
|
if ($BY_FILE or ($report_type eq "by report file")) {
|
|
push @results, "<files>";
|
|
} else {
|
|
push @results, "<languages>";
|
|
}
|
|
}
|
|
} else {
|
|
$header_line =~ s/,// if $opt_csv;
|
|
push @results, output_header($header_line, $hyphen_line, $BY_FILE);
|
|
}
|
|
|
|
if ($Style eq "txt") {
|
|
# column headers
|
|
if (!$opt_3 and $BY_FILE) {
|
|
my $spacing_n = $spacing_1 - 11;
|
|
$data_line = sprintf "%-${spacing_n}s ", $first_column;
|
|
} else {
|
|
$data_line = sprintf "%-${spacing_1}s ", $first_column;
|
|
}
|
|
if ($BY_FILE) {
|
|
$data_line .= sprintf "%${spacing_2}s " , " " ;
|
|
} else {
|
|
$data_line .= sprintf "%${spacing_2}s " , "files";
|
|
}
|
|
my $PCT_symbol = "";
|
|
$PCT_symbol = " \%" if $opt_by_percent;
|
|
$data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s",
|
|
"blank${PCT_symbol}" ,
|
|
"comment${PCT_symbol}" ,
|
|
"code";
|
|
$data_line .= sprintf " %8s %14s",
|
|
"scale" ,
|
|
"3rd gen. equiv"
|
|
if $opt_3;
|
|
if ($opt_md) {
|
|
my @col_header = ();
|
|
if ($data_line =~ m{\s%}) {
|
|
$data_line =~ s{\s%}{_%}g;
|
|
foreach my $w ( split(' ', $data_line) ) {
|
|
$w =~ s{_%}{ %};
|
|
push @col_header, $w;
|
|
}
|
|
} else {
|
|
push @col_header, split(' ', $data_line);
|
|
}
|
|
my @col_hyphens = ( '-------:') x scalar(@col_header);
|
|
$col_hyphens[0] = ':-------'; # first column left justified
|
|
push @results, join("|", @col_header );
|
|
push @results, join("|", @col_hyphens);
|
|
} else {
|
|
push @results, $data_line;
|
|
push @results, $hyphen_line;
|
|
}
|
|
}
|
|
|
|
if ($opt_csv) {
|
|
my $header2;
|
|
if ($BY_FILE) {
|
|
$header2 = "language${DELIM}filename";
|
|
} else {
|
|
$header2 = "files${DELIM}language";
|
|
}
|
|
$header2 .= "${DELIM}blank${DELIM}comment${DELIM}code";
|
|
$header2 .= "${DELIM}scale${DELIM}3rd gen. equiv" if $opt_3;
|
|
$header2 .= ${DELIM} . '"' . $header_line . '"';
|
|
push @results, $header2;
|
|
}
|
|
|
|
my $sum_scaled = 0;
|
|
foreach my $lang_or_file (sort {
|
|
$rhh_count->{$b}{'code'} <=>
|
|
$rhh_count->{$a}{'code'}
|
|
or $a cmp $b
|
|
}
|
|
keys %{$rhh_count}) {
|
|
next if $lang_or_file eq "by report file";
|
|
my ($factor, $scaled);
|
|
if ($BY_LANGUAGE or $BY_FILE) {
|
|
$factor = 1;
|
|
if ($BY_LANGUAGE) {
|
|
if (defined $rh_scale->{$lang_or_file}) {
|
|
$factor = $rh_scale->{$lang_or_file};
|
|
} else {
|
|
warn "No scale factor for $lang_or_file; using 1.00";
|
|
}
|
|
} else { # by individual code file
|
|
if ($report_type ne "by report file") {
|
|
next unless defined $rhh_count->{$lang_or_file}{'lang'};
|
|
next unless defined $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
|
|
$factor = $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}};
|
|
}
|
|
}
|
|
$scaled = $factor*$rhh_count->{$lang_or_file}{'code'};
|
|
} else {
|
|
if (!defined $rhh_count->{$lang_or_file}{'scaled'}) {
|
|
$opt_3 = 0;
|
|
# If we're summing together files previously generated
|
|
# with --no3 then rhh_count->{$lang_or_file}{'scaled'}
|
|
# this variable will be undefined. That should only
|
|
# happen when summing together by file however.
|
|
} elsif ($BY_LANGUAGE) {
|
|
warn "Missing scaled language info for $lang_or_file\n";
|
|
}
|
|
if ($opt_3) {
|
|
$scaled = $rhh_count->{$lang_or_file}{'scaled'};
|
|
$factor = $scaled/$rhh_count->{$lang_or_file}{'code'};
|
|
}
|
|
}
|
|
|
|
if ($BY_FILE) {
|
|
my $clean_filename = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
|
|
$clean_filename = xml_metachars($clean_filename) if $opt_xml;
|
|
$data_line = sprintf $Format{'1'}{$Style}, $clean_filename;
|
|
} else {
|
|
$data_line = sprintf $Format{'2'}{$Style}, $lang_or_file;
|
|
}
|
|
$data_line .= sprintf $Format{3}{$Style} ,
|
|
$rhh_count->{$lang_or_file}{'nFiles'} unless $BY_FILE;
|
|
if ($opt_by_percent) {
|
|
my $DEN = compute_denominator($opt_by_percent ,
|
|
$rhh_count->{$lang_or_file}{'code'} ,
|
|
$rhh_count->{$lang_or_file}{'comment'},
|
|
$rhh_count->{$lang_or_file}{'blank'} );
|
|
$data_line .= sprintf $Format{5}{$Style} ,
|
|
$rhh_count->{$lang_or_file}{'blank'} / $DEN * 100,
|
|
$rhh_count->{$lang_or_file}{'comment'} / $DEN * 100,
|
|
$rhh_count->{$lang_or_file}{'code'} ;
|
|
} else {
|
|
$data_line .= sprintf $Format{4}{$Style} ,
|
|
$rhh_count->{$lang_or_file}{'blank'} ,
|
|
$rhh_count->{$lang_or_file}{'comment'},
|
|
$rhh_count->{$lang_or_file}{'code'} ;
|
|
}
|
|
$data_line .= sprintf $Format{6}{$Style} ,
|
|
$factor ,
|
|
$scaled if $opt_3;
|
|
$sum_scaled += $scaled if $opt_3;
|
|
|
|
if ($opt_xml) {
|
|
if (defined $rhh_count->{$lang_or_file}{'lang'}) {
|
|
my $lang = $rhh_count->{$lang_or_file}{'lang'};
|
|
if (!defined $languages->{$lang}) {
|
|
$languages->{$lang} = $lang;
|
|
}
|
|
$data_line.=' language="' . $lang . '" ';
|
|
}
|
|
if ($BY_FILE or ($report_type eq "by report file")) {
|
|
push @results, " <file " . $data_line . "/>";
|
|
} else {
|
|
push @results, " <language " . $data_line . "/>";
|
|
}
|
|
} elsif ($opt_yaml or $opt_json) {
|
|
my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
|
|
if ($opt_yaml) {
|
|
# YAML: force quoted language or filename in case these
|
|
# have embedded funny characters, issue #312
|
|
push @results,"'" . rm_leading_tempdir($lang_or_file, \%TEMP_DIR). "' :$open_B";
|
|
} else {
|
|
push @results,"${Q}" . rm_leading_tempdir($lang_or_file, \%TEMP_DIR). "${Q} :$open_B";
|
|
}
|
|
push @results," ${Q}nFiles${Q}: " . $rhh_count->{$lang_or_file}{'nFiles'} . $C
|
|
unless $BY_FILE;
|
|
if ($opt_by_percent) {
|
|
my $DEN = compute_denominator($opt_by_percent ,
|
|
$rhh_count->{$lang_or_file}{'code'} ,
|
|
$rhh_count->{$lang_or_file}{'comment'},
|
|
$rhh_count->{$lang_or_file}{'blank'} );
|
|
push @results," ${Q}blank_pct${Q}: " .
|
|
sprintf("%3.2f", $rhh_count->{$lang_or_file}{'blank'} / $DEN * 100) . $C;
|
|
push @results," ${Q}comment_pct${Q}: " .
|
|
sprintf("%3.2f", $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100) . $C;
|
|
push @results," ${Q}code${Q}: " . $rhh_count->{$lang_or_file}{'code'} . $C;
|
|
} else {
|
|
push @results," ${Q}blank${Q}: " . $rhh_count->{$lang_or_file}{'blank'} . $C;
|
|
push @results," ${Q}comment${Q}: " . $rhh_count->{$lang_or_file}{'comment'} . $C;
|
|
push @results," ${Q}code${Q}: " . $rhh_count->{$lang_or_file}{'code'} . $C;
|
|
}
|
|
push @results," ${Q}language${Q}: " . $Q . $rhh_count->{$lang_or_file}{'lang'} . $Q . $C
|
|
if $BY_FILE;
|
|
if ($opt_3) {
|
|
push @results, " ${Q}scaled${Q}: " . $scaled . $C;
|
|
push @results, " ${Q}factor${Q}: " . $factor . $C;
|
|
}
|
|
if ($opt_json) { # replace the trailing comma with }, on the last line
|
|
$results[-1] =~ s/,\s*$/},/;
|
|
}
|
|
} elsif ($opt_csv or $opt_md) {
|
|
my $extra_3 = "";
|
|
$extra_3 = "${DELIM}$factor${DELIM}$scaled" if $opt_3;
|
|
my $first_column = undef;
|
|
my $clean_name = $lang_or_file;
|
|
my $str;
|
|
if ($opt_csv) {
|
|
if ($BY_FILE) {
|
|
$first_column = $rhh_count->{$lang_or_file}{'lang'};
|
|
$clean_name = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
|
|
} else {
|
|
$first_column = $rhh_count->{$lang_or_file}{'nFiles'};
|
|
}
|
|
$str = $first_column . ${DELIM} .
|
|
$clean_name . ${DELIM};
|
|
} else {
|
|
if ($BY_FILE) {
|
|
$first_column = $rhh_count->{$lang_or_file}{'lang'};
|
|
$clean_name = rm_leading_tempdir($lang_or_file, \%TEMP_DIR);
|
|
$str = $clean_name . ${DELIM};
|
|
} else {
|
|
$first_column = $rhh_count->{$lang_or_file}{'nFiles'};
|
|
$str = $clean_name . ${DELIM} .
|
|
$first_column . ${DELIM};
|
|
}
|
|
}
|
|
if ($opt_by_percent) {
|
|
my $DEN = compute_denominator($opt_by_percent ,
|
|
$rhh_count->{$lang_or_file}{'code'} ,
|
|
$rhh_count->{$lang_or_file}{'comment'},
|
|
$rhh_count->{$lang_or_file}{'blank'} );
|
|
$str .= sprintf("%3.2f", $rhh_count->{$lang_or_file}{'blank'} / $DEN * 100) . ${DELIM} .
|
|
sprintf("%3.2f", $rhh_count->{$lang_or_file}{'comment'} / $DEN * 100) . ${DELIM} .
|
|
$rhh_count->{$lang_or_file}{'code'};
|
|
} else {
|
|
$str .= $rhh_count->{$lang_or_file}{'blank'} . ${DELIM} .
|
|
$rhh_count->{$lang_or_file}{'comment'}. ${DELIM} .
|
|
$rhh_count->{$lang_or_file}{'code'};
|
|
}
|
|
$str .= $extra_3;
|
|
push @results, $str;
|
|
|
|
} else {
|
|
push @results, $data_line;
|
|
}
|
|
}
|
|
|
|
my $avg_scale = 1; # weighted average of scale factors
|
|
$avg_scale = sprintf("%.2f", $sum_scaled / $sum_code)
|
|
if $sum_code and $opt_3;
|
|
|
|
if ($opt_xml) {
|
|
$data_line = "";
|
|
if (!$BY_FILE) {
|
|
$data_line .= sprintf "sum_files=\"%d\" ", $sum_files;
|
|
}
|
|
if ($opt_by_percent) {
|
|
my $DEN = compute_denominator($opt_by_percent ,
|
|
$sum_code, $sum_comment, $sum_blank);
|
|
$data_line .= sprintf $Format{'5'}{$Style},
|
|
$sum_blank / $DEN * 100,
|
|
$sum_comment / $DEN * 100,
|
|
$sum_code ;
|
|
} else {
|
|
$data_line .= sprintf $Format{'4'}{$Style},
|
|
$sum_blank ,
|
|
$sum_comment ,
|
|
$sum_code ;
|
|
}
|
|
$data_line .= sprintf $Format{'6'}{$Style},
|
|
$avg_scale ,
|
|
$sum_scaled if $opt_3;
|
|
push @results, " <total " . $data_line . "/>";
|
|
|
|
if ($BY_FILE or ($report_type eq "by report file")) {
|
|
push @results, "</files>";
|
|
} else {
|
|
foreach my $language (keys %{$languages}) {
|
|
push @results, ' <language name="' . $language . '"/>';
|
|
}
|
|
push @results, "</languages>";
|
|
}
|
|
|
|
if (!$opt_by_file_by_lang or $ALREADY_SHOWED_XML_SECTION) {
|
|
push @results, "</results>";
|
|
} else {
|
|
$ALREADY_SHOWED_XML_SECTION = 1;
|
|
}
|
|
} elsif ($opt_yaml or $opt_json) {
|
|
my ($Q, $open_B, $close_B, $start, $C) = yaml_to_json_separators();
|
|
push @results, "${Q}SUM${Q}: ${open_B}";
|
|
if ($opt_by_percent) {
|
|
my $DEN = compute_denominator($opt_by_percent ,
|
|
$sum_code, $sum_comment, $sum_blank);
|
|
push @results, " ${Q}blank${Q}: " . sprintf("%.2f", $sum_blank / $DEN * 100) . $C;
|
|
push @results, " ${Q}comment${Q}: ". sprintf("%.2f", $sum_comment / $DEN * 100) . $C;
|
|
push @results, " ${Q}code${Q}: " . $sum_code . $C;
|
|
} else {
|
|
push @results, " ${Q}blank${Q}: " . $sum_blank . $C;
|
|
push @results, " ${Q}comment${Q}: ". $sum_comment . $C;
|
|
push @results, " ${Q}code${Q}: " . $sum_code . $C;
|
|
}
|
|
push @results, " ${Q}nFiles${Q}: " . $sum_files . $C;
|
|
if ($opt_3) {
|
|
push @results, " ${Q}scaled${Q}: " . $sum_scaled . $C;
|
|
push @results, " ${Q}factor${Q}: " . $avg_scale . $C;
|
|
}
|
|
if ($opt_json) {
|
|
$results[-1] =~ s/,\s*$/} }/;
|
|
if ($opt_by_file_by_lang) {
|
|
if ($ALREADY_SHOWED_HEADER) {
|
|
$results[-1] .= ' }';
|
|
} else {
|
|
$results[-1] .= ', "by_lang" : {';
|
|
}
|
|
}
|
|
}
|
|
} elsif ($opt_csv) {
|
|
my @entries = ();
|
|
if ($opt_by_file) {
|
|
push @entries, "SUM";
|
|
push @entries, "";
|
|
} else {
|
|
push @entries, $sum_files;
|
|
push @entries, "SUM";
|
|
}
|
|
if ($opt_by_percent) {
|
|
my $DEN = compute_denominator($opt_by_percent ,
|
|
$sum_code, $sum_comment, $sum_blank);
|
|
push @entries, sprintf("%.2f", $sum_blank / $DEN * 100);
|
|
push @entries, sprintf("%.2f", $sum_comment / $DEN * 100);
|
|
} else {
|
|
push @entries, $sum_blank;
|
|
push @entries, $sum_comment;
|
|
}
|
|
push @entries, $sum_code;
|
|
if ($opt_3) {
|
|
push @entries, $sum_scaled;
|
|
push @entries, $avg_scale ;
|
|
}
|
|
my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ",";
|
|
push @results, join($separator, @entries);
|
|
} else {
|
|
|
|
if ($BY_FILE) {
|
|
$data_line = sprintf "%-${spacing_0}s ", "SUM:" ;
|
|
} else {
|
|
$data_line = sprintf "%-${spacing_1}s ", "SUM:" ;
|
|
$data_line .= sprintf "%${spacing_2}d ", $sum_files;
|
|
}
|
|
if ($opt_by_percent) {
|
|
my $DEN = compute_denominator($opt_by_percent ,
|
|
$sum_code, $sum_comment, $sum_blank);
|
|
$data_line .= sprintf $Format{'5'}{$Style},
|
|
$sum_blank / $DEN * 100,
|
|
$sum_comment / $DEN * 100,
|
|
$sum_code ;
|
|
} else {
|
|
$data_line .= sprintf $Format{'4'}{$Style},
|
|
$sum_blank ,
|
|
$sum_comment ,
|
|
$sum_code ;
|
|
}
|
|
$data_line .= sprintf $Format{'6'}{$Style},
|
|
$avg_scale ,
|
|
$sum_scaled if $opt_3;
|
|
if ($opt_md) {
|
|
my @words = split(' ', $data_line);
|
|
my $n_cols = scalar(@words);
|
|
# my $n_cols = scalar(split(' ', $data_line)); # deprecated
|
|
$data_line =~ s/\s+/\|/g;
|
|
my @col_hyphens = ( '--------') x $n_cols;
|
|
push @results, join("|", @col_hyphens);
|
|
push @results, $data_line if $sum_files > 1 or $opt_sum_one;
|
|
unshift @results, ( "cloc|$header_line", "--- | ---", "", );
|
|
} else {
|
|
push @results, $hyphen_line if $sum_files > 1 or $opt_sum_one;
|
|
push @results, $data_line if $sum_files > 1 or $opt_sum_one;
|
|
push @results, $hyphen_line;
|
|
}
|
|
}
|
|
write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL;
|
|
$ALREADY_SHOWED_HEADER = 1 unless $opt_sum_reports;
|
|
print "<- generate_report\n" if $opt_v > 2;
|
|
return @results;
|
|
} # 1}}}
|
|
sub print_errors { # {{{1
|
|
my ($rh_Error_Codes, # in
|
|
$raa_errors , # in
|
|
) = @_;
|
|
|
|
print "-> print_errors\n" if $opt_v > 2;
|
|
my %error_string = reverse(%{$rh_Error_Codes});
|
|
my $nErrors = scalar @{$raa_errors};
|
|
warn sprintf "\n%d error%s:\n", plural_form(scalar @Errors);
|
|
for (my $i = 0; $i < $nErrors; $i++) {
|
|
warn sprintf "%s: %s\n",
|
|
$error_string{ $raa_errors->[$i][0] },
|
|
$raa_errors->[$i][1] ;
|
|
}
|
|
print "<- print_errors\n" if $opt_v > 2;
|
|
|
|
} # 1}}}
|
|
sub write_lang_def { # {{{1
|
|
my ($file ,
|
|
$rh_Language_by_Extension , # in
|
|
$rh_Language_by_Script , # in
|
|
$rh_Language_by_File , # in
|
|
$rhaa_Filters_by_Language , # in
|
|
$rh_Not_Code_Extension , # in
|
|
$rh_Not_Code_Filename , # in
|
|
$rh_Scale_Factor , # in
|
|
$rh_EOL_Continuation_re , # in
|
|
) = @_;
|
|
|
|
print "-> write_lang_def($file)\n" if $opt_v > 2;
|
|
my @outlines = ();
|
|
|
|
foreach my $language (sort keys %{$rhaa_Filters_by_Language}) {
|
|
next if $language =~ /(Brain|\(unknown\))/;
|
|
next if defined $Extension_Collision{$language};
|
|
push @outlines, $language;
|
|
foreach my $filter (@{$rhaa_Filters_by_Language->{$language}}) {
|
|
my $line = "";
|
|
$line .= sprintf " filter %s", $filter->[0];
|
|
$line .= sprintf " %s", $filter->[1] if defined $filter->[1];
|
|
# $filter->[0] == 'remove_between_general',
|
|
# 'remove_between_regex', and
|
|
# 'remove_matches_2re' have two args
|
|
$line .= sprintf " %s", $filter->[2] if defined $filter->[2];
|
|
# $filter->[0] == 'replace_between_regex' has three or four args
|
|
$line .= sprintf " %s", $filter->[3] if defined $filter->[3];
|
|
$line .= sprintf " %s", $filter->[4] if defined $filter->[4];
|
|
push @outlines, $line;
|
|
}
|
|
|
|
# file extension won't appear if the extension maps to
|
|
# multiple languages; work around this
|
|
my $found = 0;
|
|
foreach my $ext (sort keys %{$rh_Language_by_Extension}) {
|
|
if ($language eq $rh_Language_by_Extension->{$ext}) {
|
|
push @outlines, sprintf " extension %s\n", $ext;
|
|
$found = 1;
|
|
}
|
|
}
|
|
if (!$found and $opt_write_lang_def_incl_dup) {
|
|
foreach my $multilang (sort keys %Extension_Collision) {
|
|
my %Languages = map { $_ => 1 } split('/', $multilang);
|
|
next unless $Languages{$language};
|
|
foreach my $ext (@{$Extension_Collision{$multilang}}) {
|
|
push @outlines, sprintf " extension %s\n", $ext;
|
|
}
|
|
}
|
|
}
|
|
|
|
foreach my $filename (sort keys %{$rh_Language_by_File}) {
|
|
if ($language eq $rh_Language_by_File->{$filename}) {
|
|
push @outlines, sprintf " filename %s\n", $filename;
|
|
}
|
|
}
|
|
foreach my $script_exe (sort keys %{$rh_Language_by_Script}) {
|
|
if ($language eq $rh_Language_by_Script->{$script_exe}) {
|
|
push @outlines, sprintf " script_exe %s\n", $script_exe;
|
|
}
|
|
}
|
|
push @outlines, sprintf " 3rd_gen_scale %.2f\n", $rh_Scale_Factor->{$language};
|
|
if (defined $rh_EOL_Continuation_re->{$language}) {
|
|
push @outlines, sprintf " end_of_line_continuation %s\n",
|
|
$rh_EOL_Continuation_re->{$language};
|
|
}
|
|
}
|
|
|
|
write_file($file, {}, @outlines);
|
|
print "<- write_lang_def\n" if $opt_v > 2;
|
|
} # 1}}}
|
|
sub read_lang_def { # {{{1
|
|
my ($file ,
|
|
$rh_Language_by_Extension , # out
|
|
$rh_Language_by_Script , # out
|
|
$rh_Language_by_File , # out
|
|
$rhaa_Filters_by_Language , # out
|
|
$rh_Not_Code_Extension , # out
|
|
$rh_Not_Code_Filename , # out
|
|
$rh_Scale_Factor , # out
|
|
$rh_EOL_Continuation_re , # out
|
|
$rh_EOL_abc,
|
|
) = @_;
|
|
|
|
|
|
print "-> read_lang_def($file)\n" if $opt_v > 2;
|
|
|
|
my $language = "";
|
|
my @lines = read_file($file);
|
|
foreach (@lines) {
|
|
next if /^\s*#/ or /^\s*$/;
|
|
|
|
$_ = lc $_ if $ON_WINDOWS and /^\s+(filename|extension)\s/;
|
|
|
|
if (/^(\w+.*?)\s*$/) {
|
|
$language = $1;
|
|
next;
|
|
}
|
|
die "Missing computer language name, line $. of $file\n"
|
|
unless $language;
|
|
|
|
if (/^\s{4}filter\s+(remove_between_(general|2re|regex))
|
|
\s+(\S+)\s+(\S+)\s*$/x) {
|
|
push @{$rhaa_Filters_by_Language->{$language}}, [
|
|
$1 , $3 , $4 ]
|
|
|
|
} elsif (/^\s{4}filter\s+(replace_between_regex)
|
|
\s+(\S+)\s+(\S+)\s+(\S+|\".*\")\s+(\S+)\s*$/x) {
|
|
push @{$rhaa_Filters_by_Language->{$language}}, [
|
|
$1 , $2 , $3 , $4 , $5]
|
|
|
|
} elsif (/^\s{4}filter\s+(replace_between_regex)
|
|
\s+(\S+)\s+(\S+)\s+(.*?)\s*$/x) {
|
|
push @{$rhaa_Filters_by_Language->{$language}}, [
|
|
$1 , $2 , $3 , $4 ]
|
|
|
|
} elsif (/^\s{4}filter\s+(replace_regex)\s+(\S+)\s*$/) {
|
|
push @{$rhaa_Filters_by_Language->{$language}}, [
|
|
$1 , $2 , '' ]
|
|
|
|
} elsif (/^\s{4}filter\s+(replace_regex)
|
|
\s+(\S+)\s+(.+?)\s*$/x) {
|
|
push @{$rhaa_Filters_by_Language->{$language}}, [
|
|
$1 , $2 , $3 ]
|
|
|
|
} elsif (/^\s{4}filter\s+(\w+)\s*$/) {
|
|
push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ]
|
|
|
|
} elsif (/^\s{4}filter\s+(\w+)\s+(.*?)\s*$/) {
|
|
push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ]
|
|
|
|
} elsif (/^\s{4}extension\s+(\S+)\s*$/) {
|
|
if (defined $rh_Language_by_Extension->{$1}) {
|
|
die "File extension collision: $1 ",
|
|
"maps to languages '$rh_Language_by_Extension->{$1}' ",
|
|
"and '$language'\n" ,
|
|
"Edit $file and remove $1 from one of these two ",
|
|
"language definitions.\n";
|
|
}
|
|
$rh_Language_by_Extension->{$1} = $language;
|
|
|
|
} elsif (/^\s{4}filename\s+(\S+)\s*$/) {
|
|
$rh_Language_by_File->{$1} = $language;
|
|
|
|
} elsif (/^\s{4}script_exe\s+(\S+)\s*$/) {
|
|
$rh_Language_by_Script->{$1} = $language;
|
|
|
|
} elsif (/^\s{4}3rd_gen_scale\s+(\S+)\s*$/) {
|
|
$rh_Scale_Factor->{$language} = $1;
|
|
|
|
} elsif (/^\s{4}end_of_line_continuation\s+(\S+)\s*$/) {
|
|
$rh_EOL_Continuation_re->{$language} = $1;
|
|
|
|
} else {
|
|
die "Unexpected data line $. of $file:\n$_\n";
|
|
}
|
|
|
|
}
|
|
print "<- read_lang_def\n" if $opt_v > 2;
|
|
} # 1}}}
|
|
sub merge_lang_def { # {{{1
|
|
my ($file ,
|
|
$rh_Language_by_Extension , # in/out
|
|
$rh_Language_by_Script , # in/out
|
|
$rh_Language_by_File , # in/out
|
|
$rhaa_Filters_by_Language , # in/out
|
|
$rh_Not_Code_Extension , # in/out
|
|
$rh_Not_Code_Filename , # in/out
|
|
$rh_Scale_Factor , # in/out
|
|
$rh_EOL_Continuation_re , # in/out
|
|
$rh_EOL_abc,
|
|
) = @_;
|
|
|
|
|
|
print "-> merge_lang_def($file)\n" if $opt_v > 2;
|
|
|
|
my $language = "";
|
|
my $already_know_it = undef;
|
|
my @lines = read_file($file);
|
|
foreach (@lines) {
|
|
next if /^\s*#/ or /^\s*$/;
|
|
|
|
$_ = lc $_ if $ON_WINDOWS and /^\s+(filename|extension)\s/;
|
|
|
|
if (/^(\w+.*?)\s*$/) {
|
|
$language = $1;
|
|
$already_know_it = defined $rh_Scale_Factor->{$language};
|
|
next;
|
|
}
|
|
die "Missing computer language name, line $. of $file\n"
|
|
unless $language;
|
|
|
|
if (/^\s{4}filter\s+(remove_between_(general|2re|regex))
|
|
\s+(\S+)\s+(\S+)\s*$/x) {
|
|
next if $already_know_it;
|
|
push @{$rhaa_Filters_by_Language->{$language}}, [
|
|
$1 , $3 , $4 ]
|
|
|
|
} elsif (/^\s{4}filter\s+(replace_between_regex)
|
|
\s+(\S+)\s+(\S+)\s+(\S+|\".*\")\s+(\S+)\s*$/x) {
|
|
next if $already_know_it;
|
|
push @{$rhaa_Filters_by_Language->{$language}}, [
|
|
$1 , $2 , $3 , $4 , $5]
|
|
|
|
} elsif (/^\s{4}filter\s+(replace_between_regex)
|
|
\s+(\S+)\s+(\S+)\s+(.*?)\s*$/x) {
|
|
next if $already_know_it;
|
|
push @{$rhaa_Filters_by_Language->{$language}}, [
|
|
$1 , $2 , $3 , $4 ]
|
|
|
|
} elsif (/^\s{4}filter\s+(replace_regex)
|
|
\s+(\S+)\s+(.+?)\s*$/x) {
|
|
next if $already_know_it;
|
|
push @{$rhaa_Filters_by_Language->{$language}}, [
|
|
$1 , $2 , $3 ]
|
|
|
|
} elsif (/^\s{4}filter\s+(\w+)\s*$/) {
|
|
next if $already_know_it;
|
|
push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ]
|
|
|
|
} elsif (/^\s{4}filter\s+(\w+)\s+(.*?)\s*$/) {
|
|
next if $already_know_it;
|
|
push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ]
|
|
|
|
} elsif (/^\s{4}extension\s+(\S+)\s*$/) {
|
|
next if $already_know_it;
|
|
if (defined $rh_Language_by_Extension->{$1}) {
|
|
die "File extension collision: $1 ",
|
|
"maps to languages '$rh_Language_by_Extension->{$1}' ",
|
|
"and '$language'\n" ,
|
|
"Edit $file and remove $1 from one of these two ",
|
|
"language definitions.\n";
|
|
}
|
|
$rh_Language_by_Extension->{$1} = $language;
|
|
|
|
} elsif (/^\s{4}filename\s+(\S+)\s*$/) {
|
|
next if $already_know_it;
|
|
$rh_Language_by_File->{$1} = $language;
|
|
|
|
} elsif (/^\s{4}script_exe\s+(\S+)\s*$/) {
|
|
next if $already_know_it;
|
|
$rh_Language_by_Script->{$1} = $language;
|
|
|
|
} elsif (/^\s{4}3rd_gen_scale\s+(\S+)\s*$/) {
|
|
next if $already_know_it;
|
|
$rh_Scale_Factor->{$language} = $1;
|
|
|
|
} elsif (/^\s{4}end_of_line_continuation\s+(\S+)\s*$/) {
|
|
next if $already_know_it;
|
|
$rh_EOL_Continuation_re->{$language} = $1;
|
|
|
|
} else {
|
|
die "Unexpected data line $. of $file:\n$_\n";
|
|
}
|
|
|
|
}
|
|
print "<- merge_lang_def\n" if $opt_v > 2;
|
|
} # 1}}}
|
|
sub print_extension_info { # {{{1
|
|
my ($extension,) = @_;
|
|
if ($extension) { # show information on this extension
|
|
foreach my $ext (sort {lc $a cmp lc $b or $a cmp $b } keys %Language_by_Extension) {
|
|
# Language_by_Extension{f} = 'Fortran 77'
|
|
next if $Language_by_Extension{$ext} =~ /Brain/;
|
|
printf "%-15s -> %s\n", $ext, $Language_by_Extension{$ext}
|
|
if $ext =~ m{$extension}i;
|
|
}
|
|
} else { # show information on all extensions
|
|
foreach my $ext (sort {lc $a cmp lc $b or $a cmp $b } keys %Language_by_Extension) {
|
|
next if $Language_by_Extension{$ext} =~ /Brain/;
|
|
# Language_by_Extension{f} = 'Fortran 77'
|
|
printf "%-15s -> %s\n", $ext, $Language_by_Extension{$ext};
|
|
}
|
|
}
|
|
} # 1}}}
|
|
sub print_language_info { # {{{1
|
|
my ($language,
|
|
$prefix ,) = @_;
|
|
my %extensions = (); # the subset matched by the given $language value
|
|
if ($language) { # show information on this language
|
|
foreach my $ext (sort {lc $a cmp lc $b or $a cmp $b } keys %Language_by_Extension) {
|
|
# Language_by_Extension{f} = 'Fortran 77'
|
|
push @{$extensions{$Language_by_Extension{$ext}} }, $ext
|
|
if lc $Language_by_Extension{$ext} eq lc $language;
|
|
# if $Language_by_Extension{$ext} =~ m{$language}i;
|
|
}
|
|
} else { # show information on all languages
|
|
foreach my $ext (sort {lc $a cmp lc $b or $a cmp $b } keys %Language_by_Extension) {
|
|
# Language_by_Extension{f} = 'Fortran 77'
|
|
push @{$extensions{$Language_by_Extension{$ext}} }, $ext
|
|
}
|
|
}
|
|
|
|
# add exceptions (one file extension mapping to multiple languages)
|
|
if (!$language or $language =~ /^(Objective-C|MATLAB|Mathematica|MUMPS|Mercury)$/i) {
|
|
push @{$extensions{'Objective-C'}}, "m";
|
|
push @{$extensions{'MATLAB'}} , "m";
|
|
push @{$extensions{'Mathematica'}}, "m";
|
|
push @{$extensions{'MUMPS'}} , "m";
|
|
delete $extensions{'MATLAB/Mathematica/Objective-C/MUMPS/Mercury'};
|
|
}
|
|
if (!$language or $language =~ /^(Lisp|OpenCL)$/i) {
|
|
push @{$extensions{'Lisp'}} , "cl";
|
|
push @{$extensions{'OpenCL'}}, "cl";
|
|
delete $extensions{'Lisp/OpenCL'};
|
|
}
|
|
if (!$language or $language =~ /^(Lisp|Julia)$/i) {
|
|
push @{$extensions{'Lisp'}} , "jl";
|
|
push @{$extensions{'Julia'}} , "jl";
|
|
delete $extensions{'Lisp/Julia'};
|
|
}
|
|
if (!$language or $language =~ /^(Perl|Prolog)$/i) {
|
|
push @{$extensions{'Perl'}} , "pl";
|
|
push @{$extensions{'Prolog'}}, "pl";
|
|
delete $extensions{'Perl/Prolog'};
|
|
}
|
|
if (!$language or $language =~ /^(Raku|Prolog)$/i) {
|
|
push @{$extensions{'Perl'}} , "p6";
|
|
push @{$extensions{'Prolog'}}, "p6";
|
|
delete $extensions{'Perl/Prolog'};
|
|
}
|
|
if (!$language or $language =~ /^(IDL|Qt Project|Prolog|ProGuard)$/i) {
|
|
push @{$extensions{'IDL'}} , "pro";
|
|
push @{$extensions{'Qt Project'}}, "pro";
|
|
push @{$extensions{'Prolog'}} , "pro";
|
|
push @{$extensions{'ProGuard'}} , "pro";
|
|
delete $extensions{'IDL/Qt Project/Prolog/ProGuard'};
|
|
}
|
|
if (!$language or $language =~ /^(D|dtrace)$/i) {
|
|
push @{$extensions{'D'}} , "d";
|
|
push @{$extensions{'dtrace'}} , "d";
|
|
delete $extensions{'D/dtrace'};
|
|
}
|
|
if (!$language or $language =~ /^Forth$/) {
|
|
push @{$extensions{'Forth'}} , "fs";
|
|
push @{$extensions{'Forth'}} , "f";
|
|
push @{$extensions{'Forth'}} , "for";
|
|
delete $extensions{'Fortran 77/Forth'};
|
|
delete $extensions{'F#/Forth'};
|
|
}
|
|
if (!$language or $language =~ /^Fortran 77$/) {
|
|
push @{$extensions{'Fortran 77'}}, "f";
|
|
push @{$extensions{'Fortran 77'}}, "for";
|
|
push @{$extensions{'F#'}} , "fs";
|
|
delete $extensions{'Fortran 77/Forth'};
|
|
}
|
|
if (!$language or $language =~ /^F#$/) {
|
|
push @{$extensions{'F#'}} , "fs";
|
|
delete $extensions{'F#/Forth'};
|
|
}
|
|
if (!$language or $language =~ /^(Verilog-SystemVerilog|Coq)$/) {
|
|
push @{$extensions{'Coq'}} , "v";
|
|
push @{$extensions{'Verilog-SystemVerilog'}} , "v";
|
|
delete $extensions{'Verilog-SystemVerilog/Coq'};
|
|
}
|
|
if (!$language or $language =~ /^(TypeScript|Qt Linguist)$/) {
|
|
push @{$extensions{'TypeScript'}} , "ts";
|
|
push @{$extensions{'Qt Linguist'}} , "ts";
|
|
delete $extensions{'TypeScript/Qt Linguist'};
|
|
}
|
|
if (!$language or $language =~ /^(Qt|Glade)$/) {
|
|
push @{$extensions{'Glade'}} , "ui";
|
|
push @{$extensions{'Qt'}} , "ui";
|
|
delete $extensions{'Qt/Glade'};
|
|
}
|
|
if (!$language or $language =~ /^(C#|Smalltalk)$/) {
|
|
push @{$extensions{'C#'}} , "cs";
|
|
push @{$extensions{'Smalltalk'}} , "cs";
|
|
delete $extensions{'C#/Smalltalk'};
|
|
}
|
|
if (!$language or $language =~ /^(Visual\s+Basic|TeX|Apex\s+Class)$/i) {
|
|
push @{$extensions{'Visual Basic'}} , "cls";
|
|
push @{$extensions{'TeX'}} , "cls";
|
|
push @{$extensions{'Apex Class'}} , "cls";
|
|
delete $extensions{'Visual Basic/TeX/Apex Class'};
|
|
}
|
|
if (!$language or $language =~ /^(Ant)$/i) {
|
|
push @{$extensions{'Ant'}} , "build.xml";
|
|
delete $extensions{'Ant/XML'};
|
|
}
|
|
if (!$language or $language =~ /^(Scheme|SaltStack)$/i) {
|
|
push @{$extensions{'Scheme'}} , "sls";
|
|
push @{$extensions{'SaltStack'}} , "sls";
|
|
delete $extensions{'Scheme/SaltStack'};
|
|
}
|
|
if ($opt_explain) {
|
|
return unless $extensions{$language};
|
|
if ($prefix) {
|
|
printf "%s %s\n", $prefix, join(", ", @{$extensions{$language}});
|
|
} else {
|
|
printf "%-26s (%s)\n", $language, join(", ", @{$extensions{$language}});
|
|
}
|
|
} else {
|
|
if (%extensions) {
|
|
foreach my $lang (sort {lc $a cmp lc $b } keys %extensions) {
|
|
next if $lang =~ /Brain/;
|
|
if ($prefix) {
|
|
printf "%s %s\n", $prefix, join(", ", @{$extensions{$lang}});
|
|
} else {
|
|
printf "%-26s (%s)\n", $lang, join(", ", @{$extensions{$lang}});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} # 1}}}
|
|
sub print_language_filters { # {{{1
|
|
my ($language,) = @_;
|
|
if (!$Filters_by_Language{$language} or
|
|
!@{$Filters_by_Language{$language}}) {
|
|
warn "Unknown language: $language\n";
|
|
warn "Use --show-lang to list all defined languages.\n";
|
|
return;
|
|
}
|
|
printf "%s\n", $language;
|
|
foreach my $filter (@{$Filters_by_Language{$language}}) {
|
|
printf " filter %s", $filter->[0];
|
|
printf " %s", $filter->[1] if defined $filter->[1];
|
|
printf " %s", $filter->[2] if defined $filter->[2];
|
|
printf " %s", $filter->[3] if defined $filter->[3];
|
|
printf " %s", $filter->[4] if defined $filter->[4];
|
|
print "\n";
|
|
}
|
|
print_language_info($language, " extensions:");
|
|
} # 1}}}
|
|
sub top_level_SMB_dir { # {{{1
|
|
# Ref https://github.com/AlDanial/cloc/issues/392, if the
|
|
# user supplies a directory name which is an SMB mount
|
|
# point, this directory will appear to File::Find as
|
|
# though it is empty unless $File::Find::dont_use_nlink
|
|
# is set to 1. This subroutine checks to see if any SMB
|
|
# mounts (identified from stat()'s fourth entry, nlink,
|
|
# having a value of 2) were passed in on the command line.
|
|
|
|
my ($ra_arg_list,) = @_; # in user supplied file name, directory name, git hash, etc
|
|
foreach my $entry (@{$ra_arg_list}) {
|
|
next unless is_dir($entry);
|
|
# gets here if $entry is a directory; now get its nlink value
|
|
my $nlink;
|
|
if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
|
|
my $stats = statL($entry);
|
|
$nlink = $stats->{nlink} if defined $stats;
|
|
} else {
|
|
my @stats = stat($entry);
|
|
$nlink = $stats[3];
|
|
}
|
|
return 1 if $nlink == 2; # meaning it is an SMB mount
|
|
}
|
|
return 0;
|
|
}
|
|
# 1}}}
|
|
sub get_git_metadata { # {{{1
|
|
my ($ra_arg_list, # in file name, directory name and/or
|
|
# git commit hash to examine
|
|
$rh_git_metadata) = @_; # out repo info
|
|
# Capture git information where possible--origin, branch, commit hash.
|
|
my $prt_args = join(",", @{$ra_arg_list});
|
|
print "-> get_git_metadata($prt_args)\n" if $opt_v > 2;
|
|
foreach my $arg (@{$ra_arg_list}) {
|
|
next if is_file($arg);
|
|
my $origin = `git remote get-url origin 2>&1`;
|
|
next if $origin =~ /^fatal:/;
|
|
chomp($rh_git_metadata->{$arg}{"origin"} = $origin);
|
|
chomp($rh_git_metadata->{$arg}{"branch"} = `git symbolic-ref --short HEAD`);
|
|
if (is_dir($arg)) {
|
|
chomp($rh_git_metadata->{$arg}{"commit"} = `git rev-parse HEAD`);
|
|
} else {
|
|
chomp($rh_git_metadata->{$arg}{"commit"} = `git rev-parse $arg`);
|
|
}
|
|
}
|
|
print "<- get_git_metadata()\n" if $opt_v > 2;
|
|
} # 1}}}
|
|
sub replace_git_hash_with_tarfile { # {{{1
|
|
my ($ra_arg_list, # in file name, directory name and/or
|
|
# git commit hash to examine
|
|
$ra_git_similarity) = @_; # out only if --opt-git-diff-simindex
|
|
# replace git hashes in $ra_arg_list with tar files
|
|
# Diff mode and count mode behave differently:
|
|
# Diff:
|
|
# file git_hash
|
|
# Extract file from the git repo and only compare to it.
|
|
# git_hash1 git_hash2
|
|
# Get listings of all files in git_hash1 and git_hash2.
|
|
# git ls-tree --name-only -r *git_hash1*
|
|
# git ls-tree --name-only -r *git_hash2*
|
|
# Next, get listings of files that changed with git_hash1
|
|
# and git_hash2.
|
|
# git diff-tree -r --no-commit-id --name-only *git_hash1* *git_hash2*
|
|
# Finally, make two tar files of git repos1 and 2 where the file
|
|
# listing is the union of changes.
|
|
# git archive -o tarfile1 *git_hash1* \
|
|
# <union of files that changed and exist in this commit>
|
|
# git archive -o tarfile2 *git_hash2* \
|
|
# <union of files that changed and exist in this commit>
|
|
# To avoid "Argument list too long" error, repeat the git
|
|
# archive step with chunks of 30,000 files at a time then
|
|
# merge the tar files as the final step.
|
|
# Regular count:
|
|
# Simply make a tar file of all files in the git repo.
|
|
|
|
my $prt_args = join(",", @{$ra_arg_list});
|
|
print "-> replace_git_hash_with_tarfile($prt_args)\n" if $opt_v > 2;
|
|
#print "ra_arg_list 1: @{$ra_arg_list}\n";
|
|
|
|
my $hash_regex = qr/^([a-f\d]{5,40}|master|HEAD)(~\d+)?$/;
|
|
my %replacement_arg_list = ();
|
|
|
|
# early exit if none of the inputs look like git hashes
|
|
my %git_hash = ();
|
|
my $i = 0;
|
|
foreach my $file_or_dir (@{$ra_arg_list}) {
|
|
++$i;
|
|
if (can_read($file_or_dir)) { # readable file or dir; not a git hash
|
|
$replacement_arg_list{$i} = $file_or_dir;
|
|
next;
|
|
} elsif ($opt_force_git or $file_or_dir =~ m/$hash_regex/) {
|
|
$git_hash{$file_or_dir} = $i;
|
|
} # else the input can't be understood; ignore for now
|
|
}
|
|
return unless %git_hash;
|
|
|
|
# my $have_tar_git = external_utility_exists($ON_WINDOWS ? "unzip" : "tar --version") &&
|
|
my $have_tar_git = external_utility_exists("tar --version") &&
|
|
external_utility_exists("git --version");
|
|
if (!$have_tar_git) {
|
|
warn "One or more inputs looks like a git hash but " .
|
|
"either git or tar is unavailable.\n";
|
|
return;
|
|
}
|
|
|
|
my %repo_listing = (); # $repo_listing{hash}{files} = 1;
|
|
foreach my $hash (sort keys %git_hash) {
|
|
my $git_list_cmd = "git ls-tree --name-only -r ";
|
|
if ($hash =~ m/(.*?):(.*?)$/) {
|
|
# requesting specific file(s) from this hash; grep for them
|
|
# Note: this capability not fully implemented yet
|
|
$git_list_cmd .= "$1|grep '$2'";
|
|
} else {
|
|
$git_list_cmd .= $hash;
|
|
}
|
|
print "$git_list_cmd\n" if $opt_v;
|
|
foreach my $file (`$git_list_cmd`) {
|
|
$file =~ s/\s+$//;
|
|
$repo_listing{$hash}{$file} = 1;
|
|
}
|
|
}
|
|
|
|
# logic for each of the modes
|
|
if ($opt_diff) {
|
|
#print "A DIFF\n";
|
|
# set the default git diff algorithm
|
|
$opt_git_diff_rel = 1 unless $opt_git_diff_all or
|
|
$opt_git_diff_simindex;
|
|
# is it git to git, or git to file/dir ?
|
|
my ($Left, $Right) = @{$ra_arg_list};
|
|
|
|
#use Data::Dumper;
|
|
#print "diff_listing= "; print Dumper(\%diff_listing);
|
|
#print "git_hash= "; print Dumper(\%git_hash);
|
|
if ($git_hash{$Left} and $git_hash{$Right}) {
|
|
#print "A DIFF git-to-git\n";
|
|
# git to git
|
|
# first make a union of all files that have changed in both commits
|
|
my %files_union = ();
|
|
|
|
my @left_files = ();
|
|
my @right_files = ();
|
|
if ($opt_git_diff_rel) {
|
|
# Strategy 1: Union files are what git consinders have changed
|
|
# between the two commits.
|
|
my $git_list_cmd = "git diff-tree -r --no-commit-id --name-only $Left $Right";
|
|
# print "$git_list_cmd\n" if $opt_v;
|
|
foreach my $file (`$git_list_cmd`) {
|
|
chomp($file);
|
|
$files_union{$file} = 1;
|
|
}
|
|
} elsif ($opt_git_diff_all) {
|
|
# Strategy 2: Union files all files in both repos.
|
|
foreach my $file (keys %{$repo_listing{$Left }},
|
|
keys %{$repo_listing{$Right}}) {
|
|
$files_union{$file} = 1;
|
|
}
|
|
} elsif ($opt_git_diff_simindex) {
|
|
# Strategy 3: Use git's own similarity index to figure
|
|
# out which files to compare.
|
|
git_similarity_index($Left , # in
|
|
$Right , # in
|
|
\@left_files , # out
|
|
\@right_files , # out
|
|
$ra_git_similarity); # out
|
|
|
|
}
|
|
|
|
if ($opt_exclude_list_file) {
|
|
my @reject_list = read_list_file($opt_exclude_list_file);
|
|
my %entries_to_drop = ();
|
|
foreach my $f_or_d (@reject_list) {
|
|
if (-d $f_or_d) {
|
|
# directory match
|
|
$entries_to_drop{$f_or_d} = 1;
|
|
foreach my $file (keys %files_union) {
|
|
$entries_to_drop{$file} = 1 if $file =~ m{$f_or_d/};
|
|
}
|
|
} else {
|
|
# exact match
|
|
$entries_to_drop{$f_or_d} = 1;
|
|
}
|
|
}
|
|
foreach my $file (keys %entries_to_drop) {
|
|
delete $files_union{$file};
|
|
}
|
|
}
|
|
|
|
if ($opt_list_file) {
|
|
my @include_list = read_list_file($opt_list_file);
|
|
my %entries_to_add = ();
|
|
foreach my $f_or_d (@include_list) {
|
|
if (-d $f_or_d) {
|
|
# directory match
|
|
$entries_to_add{$f_or_d} = 1;
|
|
foreach my $file (keys %files_union) {
|
|
$entries_to_add{$file} = 1 if $file =~ m{$f_or_d/};
|
|
}
|
|
} else {
|
|
# exact match
|
|
$entries_to_add{$f_or_d} = 1;
|
|
}
|
|
}
|
|
%files_union = %entries_to_add; # and exclude all others
|
|
}
|
|
|
|
# then make truncated tar files of those union files which
|
|
# actually exist in each repo
|
|
foreach my $file (sort keys %files_union) {
|
|
push @left_files , $file if $repo_listing{$Left }{$file};
|
|
push @right_files, $file if $repo_listing{$Right}{$file};
|
|
}
|
|
# backslash whitespace, weird chars within file names (#257, #284)
|
|
|
|
# my @Lfiles= map {$_ =~ s/([\s\(\)\[\]{}';\^\$\?])/\\$1/g; $_} @left_files;
|
|
# my @Lfiles= @left_files;
|
|
if(scalar(@left_files) > 0) {
|
|
$replacement_arg_list{$git_hash{$Left}} = git_archive($Left , \@left_files);
|
|
} else {
|
|
# In the right side commit ONLY file(s) was added, so no file(s) will exist in the left side commit.
|
|
# Create empty TAR to detect added lines of code.
|
|
$replacement_arg_list{$git_hash{$Left}} = empty_tar();
|
|
}
|
|
# $replacement_arg_list{$git_hash{$Left}} = git_archive($Left , \@Lfiles);
|
|
# my @Rfiles= map {$_ =~ s/([\s\(\)\[\]{}';\^\$\?])/\\$1/g; $_} @right_files ;
|
|
# my @Rfiles= @right_files ;
|
|
#use Data::Dumper;
|
|
#print Dumper('left' , \@left_files);
|
|
#print Dumper('right', \@right_files);
|
|
#die;
|
|
|
|
if(scalar(@right_files) > 0) {
|
|
$replacement_arg_list{$git_hash{$Right}} = git_archive($Right, \@right_files);
|
|
} else {
|
|
# In the left side commit ONLY file(s) was deleted, so file(s) will not exist in right side commit.
|
|
# Create empty TAR to detect removed lines of code.
|
|
$replacement_arg_list{$git_hash{$Right}} = empty_tar();
|
|
}
|
|
# $replacement_arg_list{$git_hash{$Right}} = git_archive($Right, \@Rfiles);
|
|
#write_file("/tmp/Lfiles.txt", {}, sort @Lfiles);
|
|
#write_file("/tmp/Rfiles.txt", {}, sort @Rfiles);
|
|
#write_file("/tmp/files_union.txt", {}, sort keys %files_union);
|
|
|
|
} else {
|
|
#print "A DIFF git-to-file or file-to-git Left=$Left Right=$Right\n";
|
|
# git to file/dir or file/dir to git
|
|
if ($git_hash{$Left} and $repo_listing{$Left}{$Right} ) {
|
|
#print "A DIFF 1\n";
|
|
# $Left is a git hash and $Right is a file
|
|
$replacement_arg_list{$git_hash{$Left}} = git_archive($Left, $Right);
|
|
} elsif ($git_hash{$Right} and $repo_listing{$Right}{$Left}) {
|
|
#print "A DIFF 2\n";
|
|
# $Left is a file and $Right is a git hash
|
|
$replacement_arg_list{$git_hash{$Right}} = git_archive($Right, $Left);
|
|
} elsif ($git_hash{$Left}) {
|
|
#print "A DIFF 3\n";
|
|
# assume Right is a directory; tar the entire git archive at this hash
|
|
$replacement_arg_list{$git_hash{$Left}} = git_archive($Left, "");
|
|
} else {
|
|
#print "A DIFF 4\n";
|
|
# assume Left is a directory; tar the entire git archive at this hash
|
|
$replacement_arg_list{$git_hash{$Right}} = git_archive($Right, "");
|
|
}
|
|
}
|
|
} else {
|
|
#print "B COUNT\n";
|
|
foreach my $hash (sort keys %git_hash) {
|
|
$replacement_arg_list{$git_hash{$hash}} = git_archive($hash);
|
|
}
|
|
}
|
|
# print "git_hash= "; print Dumper(\%git_hash);
|
|
#print "repo_listing= "; print Dumper(\%repo_listing);
|
|
|
|
# replace the input arg list with the new one
|
|
@{$ra_arg_list} = ();
|
|
foreach my $index (sort {$a <=> $b} keys %replacement_arg_list) {
|
|
push @{$ra_arg_list}, $replacement_arg_list{$index};
|
|
}
|
|
|
|
#print "ra_arg_list 2: @{$ra_arg_list}\n";
|
|
print "<- replace_git_hash_with_tarfile()\n" if $opt_v > 2;
|
|
} # 1}}}
|
|
sub git_similarity_index { # {{{
|
|
my ($git_hash_Left , # in
|
|
$git_hash_Right , # in
|
|
$ra_left_files , # out
|
|
$ra_right_files , # out
|
|
$ra_git_similarity) = @_; # out
|
|
die "this option is not yet implemented";
|
|
print "-> git_similarity_index($git_hash_Left, $git_hash_Right)\n" if $opt_v > 2;
|
|
my $cmd = "git diff -M --name-status $git_hash_Left $git_hash_Right";
|
|
print $cmd, "\n" if $opt_v;
|
|
open(GSIM, "$cmd |") or die "Unable to run $cmd $!";
|
|
while (<GSIM>) {
|
|
print "git similarity> $_";
|
|
}
|
|
close(GSIM);
|
|
print "<- git_similarity_index\n" if $opt_v > 2;
|
|
} # 1}}}
|
|
sub empty_tar { # {{{1
|
|
my ($Tarfh, $Tarfile);
|
|
if ($opt_sdir) {
|
|
File::Path::mkpath($opt_sdir) unless is_dir($opt_sdir);
|
|
($Tarfh, $Tarfile) = tempfile(UNLINK => 1, DIR => $opt_sdir, SUFFIX => $ON_WINDOWS ? '.zip' : '.tar'); # delete on exit
|
|
} else {
|
|
($Tarfh, $Tarfile) = tempfile(UNLINK => 1, SUFFIX => $ON_WINDOWS ? '.zip' : '.tar'); # delete on exit
|
|
}
|
|
my $cmd = $ON_WINDOWS ? "type nul > $Tarfile" : "tar -cf $Tarfile -T /dev/null";
|
|
print $cmd, "\n" if $opt_v;
|
|
system $cmd;
|
|
if (!can_read($Tarfile)) {
|
|
# not readable
|
|
die "Failed to create empty tarfile.";
|
|
}
|
|
|
|
return $Tarfile;
|
|
} # 1}}}
|
|
sub git_archive { # {{{1
|
|
# Invoke 'git archive' as a system command to create a tar file
|
|
# using the given argument(s).
|
|
my ($A1, $A2) = @_;
|
|
print "-> git_archive($A1)\n" if $opt_v > 2;
|
|
|
|
my $args = undef;
|
|
my @File_Set = ( );
|
|
my $n_sets = 1;
|
|
if (ref $A2 eq 'ARRAY') {
|
|
# Avoid "Argument list too long" for the git archive command
|
|
# by splitting the inputs into sets of 10,000 files (issue 273).
|
|
my $FILES_PER_ARCHIVE = 1_000;
|
|
$FILES_PER_ARCHIVE = 100 if $ON_WINDOWS; # github.com/AlDanial/cloc/issues/404
|
|
|
|
my $n_files = scalar(@{$A2});
|
|
$n_sets = $n_files/$FILES_PER_ARCHIVE;
|
|
$n_sets = 1 + int($n_sets) if $n_sets > int($n_sets);
|
|
$n_sets = 1 if !$n_sets;
|
|
foreach my $i (0..$n_sets-1) {
|
|
@{$File_Set[$i]} = ( );
|
|
my $start = $i*$FILES_PER_ARCHIVE;
|
|
my $end = smaller(($i+1)*$FILES_PER_ARCHIVE, $n_files) - 1;
|
|
# Wrap each file name in single quotes to protect spaces
|
|
# and other odd characters. File names that themselves have
|
|
# single quotes are instead wrapped in double quotes. File
|
|
# names with both single and double quotes... jeez.
|
|
foreach my $fname (@{$A2}[$start .. $end]) {
|
|
if ($fname =~ /^".*?\\".*?"$/) {
|
|
# git pre-handles filenames with double quotes by backslashing
|
|
# each double quote then surrounding entire name in double
|
|
# quotes; undo this otherwise archive command crashes
|
|
$fname =~ s/\\"/"/g;
|
|
$fname =~ s/^"(.*)"$/$1/;
|
|
} elsif ($fname =~ /'/ or $ON_WINDOWS) {
|
|
push @{$File_Set[$i]}, "\"$fname\"";
|
|
} else {
|
|
push @{$File_Set[$i]}, "'$fname'";
|
|
}
|
|
}
|
|
unshift @{$File_Set[$i]}, "$A1 "; # prepend git hash to beginning of list
|
|
|
|
##xx# # don't include \$ in the regex because git handles these correctly
|
|
# # to each word in @{$A2}[$start .. $end]: first backslash each
|
|
# # single quote, then wrap all entries in single quotes (#320)
|
|
# push @File_Set,
|
|
# "$A1 " . join(" ", map {$_ =~ s/'/\'/g; $_ =~ s/^(.*)$/'$1'/g; $_}
|
|
## "$A1 " . join(" ", map {$_ =~ s/([\s\(\)\[\]{}';\^\?])/\\$1/g; $_}
|
|
# @{$A2}[$start .. $end]);
|
|
}
|
|
} else {
|
|
if (defined $A2) {
|
|
push @{$File_Set[0]}, "$A1 $A2";
|
|
} else {
|
|
push @{$File_Set[0]}, "$A1";
|
|
}
|
|
}
|
|
|
|
my $files_this_commit = join(" ", @{$File_Set[0]});
|
|
print " git_archive(file_set[0]=$files_this_commit)\n" if $opt_v > 2;
|
|
my ($Tarfh, $Tarfile);
|
|
if ($opt_sdir) {
|
|
File::Path::mkpath($opt_sdir) unless is_dir($opt_sdir);
|
|
($Tarfh, $Tarfile) = tempfile(UNLINK => 1, DIR => $opt_sdir, SUFFIX => '.tar'); # delete on exit
|
|
} else {
|
|
($Tarfh, $Tarfile) = tempfile(UNLINK => 1, SUFFIX => '.tar'); # delete on exit
|
|
}
|
|
my $cmd = "git archive -o $Tarfile $files_this_commit";
|
|
print $cmd, "\n" if $opt_v;
|
|
system $cmd;
|
|
if (!can_read($Tarfile) or !get_size($Tarfile)) {
|
|
# not readable, or zero sized
|
|
die "Failed to create tarfile of files from git.";
|
|
}
|
|
if ($n_sets > 1) {
|
|
if ($ON_WINDOWS) {
|
|
my @tar_files = ( $Tarfile );
|
|
my $start_dir = cwd;
|
|
foreach my $i (1..$n_sets-1) {
|
|
my $fname = sprintf "%s_extra_%08d", $Tarfile, $i;
|
|
my $files_this_commit = join(" ", @{$File_Set[$i]});
|
|
my $cmd = "git archive -o $fname $files_this_commit";
|
|
print $cmd, "\n" if $opt_v;
|
|
system $cmd;
|
|
push @tar_files, $fname;
|
|
}
|
|
# Windows tar can't combine tar files so expand
|
|
# them all to one directory then re-tar
|
|
my $extract_dir = tempdir( CLEANUP => 0 ); # 1 = delete on exit
|
|
chdir "$extract_dir";
|
|
foreach my $T (@tar_files) {
|
|
next unless is_file($T) and get_size($T);
|
|
my $cmd = "tar -x -f \"$T\"";
|
|
print $cmd, "\n" if $opt_v;
|
|
system $cmd;
|
|
unlink "$T";
|
|
}
|
|
chdir "..";
|
|
$Tarfile .= ".final.tar";
|
|
my $cmd = "tar -c -f \"${Tarfile}\" \"$extract_dir\"";
|
|
print $cmd, "\n" if $opt_v;
|
|
system $cmd;
|
|
chdir "$start_dir";
|
|
} else {
|
|
foreach my $i (1..$n_sets-1) {
|
|
my $files_this_commit = join(" ", @{$File_Set[$i]});
|
|
my $cmd = "git archive -o ${Tarfile}_extra $files_this_commit";
|
|
print $cmd, "\n" if $opt_v;
|
|
system $cmd;
|
|
# and merge into the first one
|
|
$cmd = "tar -A -f ${Tarfile} ${Tarfile}_extra";
|
|
print $cmd, "\n" if $opt_v;
|
|
system $cmd;
|
|
}
|
|
unlink "${Tarfile}_extra";
|
|
}
|
|
}
|
|
print "<- git_archive() made $Tarfile\n" if $opt_v > 2;
|
|
return $Tarfile
|
|
} # 1}}}
|
|
sub smaller { # {{{1
|
|
my( $a, $b ) = @_;
|
|
return $a < $b ? $a : $b;
|
|
} # 1}}}
|
|
sub lower_on_Windows { # {{{1
|
|
# If on Unix(-like), do nothing, just return the input.
|
|
# If on Windows, return a lowercase version of the file
|
|
# and also update %upper_lower_map with this new entry.
|
|
# Needed in make_file_list() because the full file list
|
|
# isn't known until the end of that routine--where
|
|
# %upper_lower_map is ordinarily populated.
|
|
my ($path,) = @_;
|
|
return $path unless $ON_WINDOWS;
|
|
my $lower = lc $path;
|
|
$upper_lower_map{$lower} = $path;
|
|
return $lower;
|
|
}
|
|
# }}}
|
|
sub make_file_list { # {{{1
|
|
my ($ra_arg_list, # in file and/or directory names to examine
|
|
$iteration , # in 0 if only called once, 1 or 2 if twice for diff
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out errors encountered
|
|
$rh_ignored , # out files not recognized as computer languages
|
|
) = @_;
|
|
print "-> make_file_list(@{$ra_arg_list})\n" if $opt_v > 2;
|
|
|
|
my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ",";
|
|
my ($fh, $filename);
|
|
if ($opt_categorized) {
|
|
if ($iteration) {
|
|
# am being called twice for diff of Left and Right
|
|
my $ext = $iteration == 1 ? "L" : "R";
|
|
$filename = $opt_categorized . "-$ext";
|
|
} else {
|
|
$filename = $opt_categorized;
|
|
}
|
|
$fh = open_file('+>', $filename, 1); # open for read/write
|
|
die "Unable to write to $filename: $!\n" unless defined $fh;
|
|
} elsif ($opt_sdir) {
|
|
# write to the user-defined scratch directory
|
|
++$TEMP_OFF;
|
|
my $scr_dir = "$opt_sdir/$TEMP_OFF";
|
|
File::Path::mkpath($scr_dir) unless is_dir($scr_dir);
|
|
$filename = $scr_dir . '/cloc_file_list.txt';
|
|
$fh = open_file('+>', $filename, 1); # open for read/write
|
|
die "Unable to write to $filename: $!\n" unless defined $fh;
|
|
} else {
|
|
# let File::Temp create a suitable temporary file
|
|
($fh, $filename) = tempfile(UNLINK => 1); # delete file on exit
|
|
print "Using temp file list [$filename]\n" if $opt_v;
|
|
}
|
|
|
|
my @dir_list = ();
|
|
foreach my $file_or_dir (@{$ra_arg_list}) {
|
|
my $size_in_bytes = 0;
|
|
my $F = lower_on_Windows($file_or_dir);
|
|
my $ul_F = $ON_WINDOWS ? $upper_lower_map{$F} : $F;
|
|
if (!can_read($file_or_dir)) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $F];
|
|
next;
|
|
}
|
|
if (is_file($file_or_dir)) {
|
|
if (!get_size($file_or_dir)) { # 0 sized file, named pipe, socket
|
|
$rh_ignored->{$F} = 'zero sized file';
|
|
next;
|
|
} elsif (is_binary($file_or_dir) and !$opt_read_binary_files) {
|
|
# avoid binary files unless user insists on reading them
|
|
if ($opt_unicode) {
|
|
# only ignore if not a Unicode file w/trivial
|
|
# ASCII transliteration
|
|
if (!unicode_file($file_or_dir)) {
|
|
$rh_ignored->{$ul_F} = 'binary file';
|
|
next;
|
|
}
|
|
} else {
|
|
$rh_ignored->{$ul_F} = 'binary file';
|
|
next;
|
|
}
|
|
}
|
|
push @file_list, "$file_or_dir";
|
|
} elsif (is_dir($file_or_dir)) {
|
|
push @dir_list, $file_or_dir;
|
|
} else {
|
|
push @{$raa_errors}, [$rh_Err->{'Neither file nor directory'} , $F];
|
|
$rh_ignored->{$F} = 'not file, not directory';
|
|
}
|
|
}
|
|
|
|
# apply exclusion rules to file names passed in on the command line
|
|
my @new_file_list = ();
|
|
foreach my $File (@file_list) {
|
|
my ($volume, $directories, $filename) = File::Spec->splitpath( $File );
|
|
my $ignore_this_file = 0;
|
|
foreach my $Sub_Dir ( File::Spec->splitdir($directories) ) {
|
|
my $SD = lower_on_Windows($Sub_Dir);
|
|
if ($Exclude_Dir{$Sub_Dir}) {
|
|
$Ignored{$SD} = "($File) --exclude-dir=$Sub_Dir";
|
|
$ignore_this_file = 1;
|
|
last;
|
|
}
|
|
}
|
|
push @new_file_list, $File unless $ignore_this_file;
|
|
}
|
|
@file_list = @new_file_list;
|
|
foreach my $dir (@dir_list) {
|
|
my $D = lower_on_Windows($dir);
|
|
#print "make_file_list dir=$dir Exclude_Dir{$dir}=$Exclude_Dir{$dir}\n";
|
|
# populates global variable @file_list
|
|
if ($Exclude_Dir{$dir}) {
|
|
$Ignored{$D} = "--exclude-dir=$Exclude_Dir{$dir}";
|
|
next;
|
|
}
|
|
if ($opt_no_recurse) {
|
|
if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
|
|
my $d = Win32::LongPath->new();
|
|
$d->opendirL($dir);
|
|
foreach my $entry ($d->readdirL()) {
|
|
my $F = "$dir/$entry";
|
|
push @file_list, $F if is_file($F);
|
|
}
|
|
$d->closedirL();
|
|
} else {
|
|
opendir(DIR, $dir);
|
|
push @file_list, grep(is_file($_), readdir(DIR));
|
|
closedir(DIR);
|
|
}
|
|
} else {
|
|
find({wanted => \&files ,
|
|
preprocess => \&find_preprocessor,
|
|
follow => $opt_follow_links }, $dir);
|
|
}
|
|
}
|
|
if ($opt_follow_links) {
|
|
# giving { 'follow' => 1 } to find() makes it skip the
|
|
# call to find_preprocessor() so have to call this manually
|
|
@file_list = manual_find_preprocessor(@file_list);
|
|
}
|
|
|
|
# there's a possibility of file duplication if user provided a list
|
|
# file or --vcs command that returns directory names; squash these
|
|
my %unique_file_list = map { $_ => 1 } @file_list;
|
|
@file_list = sort keys %unique_file_list;
|
|
|
|
$nFiles_Found = scalar @file_list;
|
|
printf "%8d text file%s.\n", plural_form($nFiles_Found) unless $opt_quiet;
|
|
write_file($opt_found, {}, sort @file_list) if $opt_found;
|
|
|
|
my $nFiles_Categorized = 0;
|
|
|
|
foreach my $file (@file_list) {
|
|
my $F = lower_on_Windows($file);
|
|
if ($ON_WINDOWS) {
|
|
(my $lc = lc $file) =~ s{\\}{/}g;
|
|
$upper_lower_map{$lc} = $file;
|
|
$file = $lc;
|
|
}
|
|
printf "classifying $file\n" if $opt_v > 2;
|
|
|
|
my $basename = basename $file;
|
|
if ($Not_Code_Filename{$basename}) {
|
|
$rh_ignored->{$F} = "listed in " . '$' .
|
|
"Not_Code_Filename{$basename}";
|
|
next;
|
|
} elsif ($basename =~ m{~$}) {
|
|
$rh_ignored->{$F} = "temporary editor file";
|
|
next;
|
|
}
|
|
|
|
my $size_in_bytes;
|
|
if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
|
|
my $stats = statL($file);
|
|
$size_in_bytes = $stats->{size} if defined $stats;
|
|
} else {
|
|
$size_in_bytes = (stat $file)[7];
|
|
}
|
|
my $language = "";
|
|
if ($All_One_Language) {
|
|
# user over-rode auto-language detection by using
|
|
# --force-lang with just a language name (no extension)
|
|
$language = $All_One_Language;
|
|
} else {
|
|
$language = classify_file($file ,
|
|
$rh_Err ,
|
|
$raa_errors,
|
|
$rh_ignored);
|
|
}
|
|
if (!defined $size_in_bytes) {
|
|
$rh_ignored->{$F} = "no longer readable";
|
|
next;
|
|
} elsif (!defined $language) {
|
|
$rh_ignored->{$F} = "unable to associate with a language";
|
|
next;
|
|
} elsif ($language eq "(unknown)") {
|
|
# entry should already be in %{$rh_ignored}
|
|
next;
|
|
}
|
|
printf $fh "%d%s%s%s%s\n", $size_in_bytes, $separator,
|
|
$language, $separator, $file;
|
|
++$nFiles_Categorized;
|
|
#printf "classified %d files\n", $nFiles_Categorized
|
|
# unless (!$opt_progress_rate or
|
|
# ($nFiles_Categorized % $opt_progress_rate));
|
|
}
|
|
printf "classified %d files\r", $nFiles_Categorized
|
|
if !$opt_quiet and $nFiles_Categorized > 1;
|
|
print "<- make_file_list()\n" if $opt_v > 2;
|
|
|
|
return $fh; # handle to the file containing the list of files to process
|
|
} # 1}}}
|
|
sub invoke_generator { # {{{1
|
|
my ($generator, $ra_user_inputs) = @_;
|
|
# If user provided file/directory inputs, only return
|
|
# generated files that are in user's request.
|
|
# Populates global variable %Ignored.
|
|
print "-> invoke_generator($generator, @{$ra_user_inputs})\n" if $opt_v > 2;
|
|
my $start_dir = cwd();
|
|
my @dir_list = ();
|
|
my @file_list = ();
|
|
|
|
if (!@{$ra_user_inputs}) {
|
|
# input must be a generator command, ie "find -type f -name '*.c'"
|
|
# issued from the cwd
|
|
push @dir_list, ".";
|
|
}
|
|
foreach my $file_dir (@{$ra_user_inputs}) {
|
|
if (is_dir($file_dir)) {
|
|
push @dir_list, $file_dir;
|
|
} elsif (is_file($file_dir)) {
|
|
push @file_list, $file_dir;
|
|
}
|
|
}
|
|
|
|
my @files = ();
|
|
foreach my $work_dir (@dir_list) {
|
|
if ($work_dir ne $start_dir) {
|
|
chdir $work_dir or die "Failed to chdir to $work_dir: $!";
|
|
}
|
|
open(FH, "$generator |") or die "Failed to pipe $generator: $!";
|
|
while(<FH>) {
|
|
chomp;
|
|
my $F = "$work_dir/$_";
|
|
print "VCS input: $F\n" if $opt_v >= 2;
|
|
if (!@file_list) {
|
|
# no files given on the command line; accept all
|
|
push @files, $F;
|
|
} else {
|
|
# is this file desired?
|
|
my $want_this_one = 0;
|
|
foreach my $file (@file_list) {
|
|
$file =~ s{\\}{/}g if $ON_WINDOWS;
|
|
if (/^$file/) {
|
|
$want_this_one = 1;
|
|
last;
|
|
}
|
|
}
|
|
push @files, $F if $want_this_one;
|
|
}
|
|
}
|
|
close(FH);
|
|
if ($work_dir ne $start_dir) {
|
|
chdir $start_dir or die "Failed to chdir to $start_dir: $!";
|
|
}
|
|
}
|
|
|
|
# apply match/not-match file/dir filters to the list so far
|
|
my @post_filter = ();
|
|
foreach my $F (@files) {
|
|
if ($opt_match_f) {
|
|
push @post_filter, $F if basename($F) =~ m{$opt_match_f};
|
|
next;
|
|
}
|
|
if ($opt_match_d) {
|
|
push @post_filter, $F if $F =~ m{$opt_match_d};
|
|
next;
|
|
}
|
|
if (@opt_not_match_d) {
|
|
my $rule;
|
|
if ($opt_fullpath and any_match($F, 0, \$rule, @opt_not_match_d)) {
|
|
$Ignored{$F} = "--not-match-d=$rule";
|
|
next;
|
|
} elsif (!$opt_fullpath and any_match(basename($F), 0, \$rule, @opt_not_match_d)) {
|
|
$Ignored{$F} = "--not-match-d (basename) =$rule";
|
|
next;
|
|
}
|
|
}
|
|
if (@opt_not_match_f) {
|
|
my $rule;
|
|
if ($opt_fullpath and any_match($F, 0, \$rule, @opt_not_match_f)) {
|
|
$Ignored{$F} = "--not-match-f=$rule";
|
|
next;
|
|
} elsif (!$opt_fullpath and any_match(basename($F), 0, \$rule, @opt_not_match_f)) {
|
|
$Ignored{$F} = "--not-match-f (basename) =$rule";
|
|
next;
|
|
}
|
|
}
|
|
my $nBytes = get_size($F);
|
|
if (!$nBytes) {
|
|
$Ignored{$F} = 'zero sized file';
|
|
printf "files(%s) zero size\n", $F if $opt_v > 5;
|
|
}
|
|
next unless $nBytes;
|
|
if ($nBytes > $opt_max_file_size*1024**2) {
|
|
$Ignored{$F} = "file size of " .
|
|
$nBytes/1024**2 . " MB exceeds max file size of " .
|
|
"$opt_max_file_size MB";
|
|
printf "file(%s) exceeds $opt_max_file_size MB\n",
|
|
$F if $opt_v > 5;
|
|
next;
|
|
}
|
|
my $is_bin = is_binary($F);
|
|
printf "files(%s) size=%d -B=%d\n",
|
|
$F, $nBytes, $is_bin if $opt_v > 5;
|
|
$is_bin = 0 if $opt_unicode and unicode_file($_);
|
|
$is_bin = 0 if $opt_read_binary_files;
|
|
next if $is_bin;
|
|
push @post_filter, $F;
|
|
}
|
|
print "<- invoke_generator\n" if $opt_v > 2;
|
|
return @post_filter;
|
|
} # 1}}}
|
|
sub any_match { # {{{1
|
|
my ($string, $entire, $rs_matched_pattern, @patterns) = @_;
|
|
print "-> any_match($string, $entire)\n" if $opt_v > 2;
|
|
foreach my $pattern (@patterns) {
|
|
if ($entire) {
|
|
if ($string =~ m{^${pattern}$}) {
|
|
${$rs_matched_pattern} = $pattern;
|
|
print "<- any_match(1)\n" if $opt_v > 2;
|
|
return 1;
|
|
}
|
|
} else {
|
|
if ($string =~ m{$pattern}) {
|
|
${$rs_matched_pattern} = $pattern;
|
|
print "<- any_match(1)\n" if $opt_v > 2;
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
print "<- any_match(0)\n" if $opt_v > 2;
|
|
return 0;
|
|
}
|
|
# }}}
|
|
sub remove_duplicate_files { # {{{1
|
|
my ($fh , # in
|
|
$rh_Language , # out
|
|
$rh_unique_source_file, # out
|
|
$rh_Err , # in
|
|
$raa_errors , # out errors encountered
|
|
$rh_ignored , # out
|
|
) = @_;
|
|
|
|
# Check for duplicate files by comparing file sizes.
|
|
# Where files are equally sized, compare their MD5 checksums.
|
|
print "-> remove_duplicate_files\n" if $opt_v > 2;
|
|
|
|
my $separator = defined $opt_csv_delimiter ? $opt_csv_delimiter : ",";
|
|
my $n = 0;
|
|
my %files_by_size = (); # files_by_size{ # bytes } = [ list of files ]
|
|
seek($fh, 0, 0); # rewind to beginning of the temp file
|
|
while (<$fh>) {
|
|
++$n;
|
|
my ($size_in_bytes, $language, $file) = split(/\Q$separator\E/, $_, 3);
|
|
if (!defined($size_in_bytes) or
|
|
!defined($language) or
|
|
!defined($file)) {
|
|
print "-> remove_duplicate_files skipping error line [$_]\n"
|
|
if $opt_v;
|
|
next;
|
|
}
|
|
chomp($file);
|
|
$file =~ s{\\}{/}g if $ON_WINDOWS;
|
|
$rh_Language->{$file} = $language;
|
|
push @{$files_by_size{$size_in_bytes}}, $file;
|
|
if ($opt_skip_uniqueness) {
|
|
$rh_unique_source_file->{$file} = 1;
|
|
}
|
|
}
|
|
return if $opt_skip_uniqueness;
|
|
if ($opt_progress_rate and ($n > $opt_progress_rate)) {
|
|
printf "Duplicate file check %d files (%d known unique)\r",
|
|
$n, scalar keys %files_by_size;
|
|
}
|
|
$n = 0;
|
|
foreach my $bytes (sort {$a <=> $b} keys %files_by_size) {
|
|
++$n;
|
|
printf "Unique: %8d files \r",
|
|
$n unless (!$opt_progress_rate or ($n % $opt_progress_rate));
|
|
if (scalar @{$files_by_size{$bytes}} == 1) {
|
|
# only one file is this big; must be unique
|
|
$rh_unique_source_file->{$files_by_size{$bytes}[0]} = 1;
|
|
next;
|
|
} else {
|
|
#print "equally sized files: ",join(", ", @{$files_by_size{$bytes}}), "\n";
|
|
# Files in the list @{$files_by_size{$bytes} all are
|
|
# $bytes long. Sort the list by file basename.
|
|
|
|
# # sorting on basename causes repeatability problems
|
|
# # if the basename is not unique (eg "includeA/x.h"
|
|
# # and "includeB/x.h". Instead, sort on full path.
|
|
# # Ref bug #114.
|
|
# my @sorted_bn = ();
|
|
# my %BN = map { basename($_) => $_ } @{$files_by_size{$bytes}};
|
|
# foreach my $F (sort keys %BN) {
|
|
# push @sorted_bn, $BN{$F};
|
|
# }
|
|
|
|
my @sorted_bn = sort @{$files_by_size{$bytes}};
|
|
|
|
foreach my $F (different_files(\@sorted_bn ,
|
|
$rh_Err ,
|
|
$raa_errors ,
|
|
$rh_ignored ) ) {
|
|
$rh_unique_source_file->{$F} = 1;
|
|
}
|
|
}
|
|
}
|
|
print "<- remove_duplicate_files\n" if $opt_v > 2;
|
|
} # 1}}}
|
|
sub manual_find_preprocessor { # {{{1
|
|
# When running with --follow-links, find_preprocessor() is not
|
|
# called by find(). Have to do it manually. Inputs here
|
|
# are only files, which differs from find_preprocessor() which
|
|
# gets directories too.
|
|
# Reads global variable %Exclude_Dir.
|
|
# Populates global variable %Ignored.
|
|
# Reject files/directories in cwd which are in the exclude list.
|
|
print "-> manual_find_preprocessor(", cwd(), ")\n" if $opt_v > 2;
|
|
my @ok = ();
|
|
|
|
foreach my $File (@_) { # pure file or directory name, no separators
|
|
my $Dir = dirname($File);
|
|
my $got_exclude_dir = 0;
|
|
foreach my $d (File::Spec->splitdir( $Dir )) {
|
|
# tests/inputs/issues/407/level2/level/Test/level2 ->
|
|
# $d iterates over tests, inputs, issues, 407,
|
|
# level2, level, Test, level2
|
|
# check every item against %Exclude_Dir
|
|
if ($Exclude_Dir{$d}) {
|
|
$got_exclude_dir = $d;
|
|
last;
|
|
}
|
|
}
|
|
if ($got_exclude_dir) {
|
|
$Ignored{$File} = "--exclude-dir=$Exclude_Dir{$got_exclude_dir}";
|
|
#print "ignoring $File\n";
|
|
} else {
|
|
if (@opt_not_match_d) {
|
|
my $rule;
|
|
if ($opt_fullpath) {
|
|
if (any_match($Dir, 1, \$rule, @opt_not_match_d)) {
|
|
$Ignored{$File} = "--not-match-d=$rule";
|
|
#print "matched fullpath\n"
|
|
} else {
|
|
push @ok, $File;
|
|
}
|
|
} elsif (any_match(basename($Dir), 0, \$rule, @opt_not_match_d)) {
|
|
$Ignored{$File} = "--not-match-d=$rule";
|
|
#print "matched partial\n"
|
|
} else {
|
|
push @ok, $File;
|
|
}
|
|
} else {
|
|
push @ok, $File;
|
|
}
|
|
}
|
|
}
|
|
|
|
print "<- manual_find_preprocessor(@ok)\n" if $opt_v > 2;
|
|
return @ok;
|
|
} # 1}}}
|
|
sub find_preprocessor { # {{{1
|
|
# invoked by File::Find's find() each time it enters a new directory
|
|
# Reads global variable %Exclude_Dir.
|
|
# Populates global variable %Ignored.
|
|
# Reject files/directories in cwd which are in the exclude list.
|
|
print "-> find_preprocessor(", cwd(), ")\n" if $opt_v > 2;
|
|
my @ok = ();
|
|
|
|
foreach my $F_or_D (@_) { # pure file or directory name, no separators
|
|
next if $F_or_D =~ /^\.{1,2}$/; # skip . and ..
|
|
if ($Exclude_Dir{$F_or_D}) {
|
|
$Ignored{$File::Find::name} = "--exclude-dir=$Exclude_Dir{$F_or_D}";
|
|
} else {
|
|
#printf " F_or_D=%-20s File::Find::name=%s\n", $F_or_D, $File::Find::name;
|
|
if (@opt_not_match_d) {
|
|
my $rule;
|
|
if ($opt_fullpath) {
|
|
if (any_match($File::Find::name, 0, \$rule, @opt_not_match_d)) {
|
|
$Ignored{$File::Find::name} = "--not-match-d=$rule";
|
|
} else {
|
|
push @ok, $F_or_D;
|
|
}
|
|
} elsif (!is_dir($F_or_D) and
|
|
any_match($File::Find::name, 0, \$rule,
|
|
@opt_not_match_d)) {
|
|
$Ignored{$File::Find::name} = "--not-match-d (basename) =$rule";
|
|
} else {
|
|
push @ok, $F_or_D;
|
|
}
|
|
} else {
|
|
push @ok, $F_or_D;
|
|
}
|
|
}
|
|
}
|
|
|
|
print "<- find_preprocessor(@ok)\n" if $opt_v > 2;
|
|
return @ok;
|
|
} # 1}}}
|
|
sub files { # {{{1
|
|
# invoked by File::Find's find() Populates global variable @file_list.
|
|
# See also find_preprocessor() which prunes undesired directories.
|
|
|
|
# my $Dir = fastcwd(); # fully qualified path -- problematic
|
|
my $Dir = $File::Find::dir; # path anchored to cloc's cwd
|
|
my $rule;
|
|
if ($opt_fullpath) {
|
|
# look at as much of the path as is known
|
|
if ($opt_match_f ) {
|
|
return unless $File::Find::name =~ m{$opt_match_f};
|
|
}
|
|
if (@opt_not_match_f) {
|
|
return if any_match($File::Find::name, 0, \$rule, @opt_not_match_f);
|
|
}
|
|
} else {
|
|
# only look at the basename
|
|
if ($opt_match_f ) { return unless /$opt_match_f/; }
|
|
if (@opt_not_match_f) { return if any_match($_, 0, \$rule, @opt_not_match_f)}
|
|
}
|
|
if ($opt_match_d) {
|
|
return unless "$Dir/" =~ m{$opt_match_d} or $Dir =~ m{$opt_match_d};
|
|
}
|
|
|
|
my $nBytes = get_size($_);
|
|
if (!$nBytes and is_file($File::Find::name)) {
|
|
$Ignored{$File::Find::name} = 'zero sized file';
|
|
printf "files(%s) zero size\n", $File::Find::name if $opt_v > 5;
|
|
}
|
|
return unless $nBytes ; # attempting other tests w/pipe or socket will hang
|
|
if ($nBytes > $opt_max_file_size*1024**2) {
|
|
$Ignored{$File::Find::name} = "file size of " .
|
|
$nBytes/1024**2 . " MB exceeds max file size of " .
|
|
"$opt_max_file_size MB";
|
|
printf "file(%s) exceeds $opt_max_file_size MB\n",
|
|
$File::Find::name if $opt_v > 5;
|
|
return;
|
|
}
|
|
my $is_dir = is_dir($_);
|
|
my $is_bin = is_binary($_);
|
|
printf "files(%s) size=%d is_dir=%d -B=%d\n",
|
|
$File::Find::name, $nBytes, $is_dir, $is_bin if $opt_v > 5;
|
|
$is_bin = 0 if $opt_unicode and unicode_file($_);
|
|
$is_bin = 0 if $opt_read_binary_files;
|
|
if ($is_bin and !$is_dir) {
|
|
$Ignored{$File::Find::name} = "binary file";
|
|
printf "files(%s) binary file\n", $File::Find::name if $opt_v > 5;
|
|
}
|
|
return if $is_dir or $is_bin;
|
|
++$nFiles_Found;
|
|
printf "%8d files\r", $nFiles_Found
|
|
unless (!$opt_progress_rate or ($nFiles_Found % $opt_progress_rate));
|
|
push @file_list, $File::Find::name;
|
|
} # 1}}}
|
|
sub archive_files { # {{{1
|
|
# invoked by File::Find's find() Populates global variable @binary_archive
|
|
foreach my $ext (keys %Known_Binary_Archives) {
|
|
push @binary_archive, $File::Find::name
|
|
if $File::Find::name =~ m{$ext$};
|
|
}
|
|
} # 1}}}
|
|
sub open_file { # {{{1
|
|
# portable method to open a file. On Windows this uses Win32::LongPath to
|
|
# allow reading/writing files past the 255 char path length limit. When on
|
|
# other operating systems, $use_new_file can be used to specify opening a
|
|
# file with `new IO::File` instead of `open`. Note: `openL` doesn't support
|
|
# the C-like fopen modes ("w", "r+", etc.), it only supports Perl mode
|
|
# strings (">", "+<", etc.). So be sure to only use Perl mode strings to
|
|
# ensure compatibility. Additionally, openL doesn't handle pipe modes; if
|
|
# you need to open a pipe/STDIN/STDOUT, use the native `open` function.
|
|
my ($mode, # Perl file mode; can not be C-style file mode
|
|
$filename, # filename to open
|
|
$use_new_file, # whether to use `new IO::File` or `open` when not using Win32::LongPath
|
|
) = @_;
|
|
if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
|
|
my $file = undef;
|
|
openL(\$file, $mode, $filename);
|
|
return $file;
|
|
} elsif ($use_new_file) {
|
|
return new IO::File $filename, $mode;
|
|
}
|
|
my $file = undef;
|
|
open($file, $mode, $filename);
|
|
return $file;
|
|
} # 1}}}
|
|
sub unlink_file { # {{{1
|
|
# portable method to unlink a file. On Windows this uses Win32::LongPath to
|
|
# allow unlinking files past the 255 char path length limit. Otherwise, the
|
|
# native `unlink` will be used.
|
|
my $filename = shift @_;
|
|
if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
|
|
return unlinkL($filename);
|
|
}
|
|
return unlink $filename;
|
|
} # 1}}}
|
|
sub is_binary { # {{{1
|
|
# portable method to test if item is a binary file. For Windows,
|
|
# Win32::LongPath doesn't provide a testL option for -B, but -B
|
|
# accepts a filehandle which does work with files opened with openL.
|
|
my $item = shift @_;
|
|
if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
|
|
my $IN = open_file('<', $item, 0);
|
|
if (defined $IN) {
|
|
my $res = -B $IN;
|
|
close($IN);
|
|
return $res;
|
|
}
|
|
return;
|
|
}
|
|
return (-B $item);
|
|
} # 1}}}
|
|
sub can_read { # {{{1
|
|
# portable method to test if item can be read
|
|
my $item = shift @_;
|
|
if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
|
|
return testL('r', $item);
|
|
}
|
|
return (-r $item);
|
|
} # 1}}}
|
|
sub get_size { # {{{1
|
|
# portable method to get size in bytes of a file
|
|
my $filename = shift @_;
|
|
if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
|
|
return testL('s', $filename);
|
|
}
|
|
return (-s $filename);
|
|
} # 1}}}
|
|
sub is_file { # {{{1
|
|
# portable method to test if item is a file
|
|
# (-f doesn't work in ActiveState Perl on Windows)
|
|
my $item = shift @_;
|
|
if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
|
|
return testL('f', $item);
|
|
}
|
|
return (-f $item);
|
|
|
|
# Was:
|
|
####if ($ON_WINDOWS) {
|
|
#### my $mode = (stat $item)[2];
|
|
#### $mode = 0 unless $mode;
|
|
#### if ($mode & 0100000) { return 1; }
|
|
#### else { return 0; }
|
|
####} else {
|
|
#### return (-f $item); # works on Unix, Linux, CygWin, z/OS
|
|
####}
|
|
} # 1}}}
|
|
sub is_dir { # {{{1
|
|
my $item = shift @_;
|
|
if ($ON_WINDOWS and $HAVE_Win32_Long_Path) {
|
|
return testL('d', $item);
|
|
}
|
|
return (-d $item); # should work everywhere now (July 2017)
|
|
|
|
# Was:
|
|
##### portable method to test if item is a directory
|
|
##### (-d doesn't work in older versions of ActiveState Perl on Windows)
|
|
|
|
####if ($ON_WINDOWS) {
|
|
#### my $mode = (stat $item)[2];
|
|
#### $mode = 0 unless $mode;
|
|
#### if ($mode & 0040000) { return 1; }
|
|
#### else { return 0; }
|
|
####} else {
|
|
#### return (-d $item); # works on Unix, Linux, CygWin, z/OS
|
|
####}
|
|
} # 1}}}
|
|
sub is_excluded { # {{{1
|
|
my ($file , # in
|
|
$excluded , # in hash of excluded directories
|
|
) = @_;
|
|
my($filename, $filepath, $suffix) = fileparse($file);
|
|
foreach my $path (sort keys %{$excluded}) {
|
|
return 1 if ($filepath =~ m{^\Q$path\E/}i);
|
|
}
|
|
} # 1}}}
|
|
sub classify_file { # {{{1
|
|
my ($full_file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
$rh_ignored , # out
|
|
) = @_;
|
|
|
|
print "-> classify_file($full_file)\n" if $opt_v > 2;
|
|
my $language = "(unknown)";
|
|
|
|
if (basename($full_file) eq "-" && defined $opt_stdin_name) {
|
|
$full_file = $opt_stdin_name;
|
|
}
|
|
|
|
my $look_at_first_line = 0;
|
|
my $file = basename $full_file;
|
|
if ($opt_autoconf and $file =~ /\.in$/) {
|
|
$file =~ s/\.in$//;
|
|
}
|
|
return $language if $Not_Code_Filename{$file}; # (unknown)
|
|
return $language if $file =~ m{~$}; # a temp edit file (unknown)
|
|
if (defined $Language_by_File{$file}) {
|
|
if ($Language_by_File{$file} eq "Ant/XML") {
|
|
return Ant_or_XML( $full_file, $rh_Err, $raa_errors);
|
|
} elsif ($Language_by_File{$file} eq "Maven/XML") {
|
|
return Maven_or_XML($full_file, $rh_Err, $raa_errors);
|
|
} else {
|
|
return $Language_by_File{$file};
|
|
}
|
|
}
|
|
|
|
if ($file =~ /\.([^\.]+)$/) { # has an extension
|
|
print "$full_file extension=[$1]\n" if $opt_v > 2;
|
|
my $extension = $1;
|
|
# Windows file names are case insensitive so map
|
|
# all extensions to lowercase there.
|
|
$extension = lc $extension if $ON_WINDOWS or $opt_ignore_case_ext;
|
|
my @extension_list = ( $extension );
|
|
if ($file =~ /\.([^\.]+\.[^\.]+)$/) { # has a double extension
|
|
my $extension = $1;
|
|
$extension = lc $extension if $ON_WINDOWS or $opt_ignore_case_ext;
|
|
unshift @extension_list, $extension; # examine double ext first
|
|
}
|
|
if ($file =~ /\.([^\.]+\.[^\.]+\.[^\.]+)$/) { # has a triple extension
|
|
my $extension = $1;
|
|
$extension = lc $extension if $ON_WINDOWS or $opt_ignore_case_ext;
|
|
unshift @extension_list, $extension; # examine triple ext first
|
|
}
|
|
foreach my $extension (@extension_list) {
|
|
if ($Not_Code_Extension{$extension} and
|
|
!$Forced_Extension{$extension}) {
|
|
# If .1 (for example) is an extension that would ordinarily be
|
|
# ignored but the user has insisted this be counted with the
|
|
# --force-lang option, then go ahead and count it.
|
|
$rh_ignored->{$full_file} =
|
|
'listed in $Not_Code_Extension{' . $extension . '}';
|
|
return $language;
|
|
}
|
|
# handle extension collisions
|
|
if (defined $Language_by_Extension{$extension}) {
|
|
if ($Language_by_Extension{$extension} eq
|
|
'MATLAB/Mathematica/Objective-C/MUMPS/Mercury') {
|
|
my $lang_M_or_O = "";
|
|
matlab_or_objective_C($full_file ,
|
|
$rh_Err ,
|
|
$raa_errors,
|
|
\$lang_M_or_O);
|
|
if ($lang_M_or_O) {
|
|
return $lang_M_or_O;
|
|
} else { # an error happened in matlab_or_objective_C()
|
|
$rh_ignored->{$full_file} =
|
|
'failure in matlab_or_objective_C($full_file)';
|
|
return $language; # (unknown)
|
|
}
|
|
} elsif ($Language_by_Extension{$extension} eq 'PHP/Pascal/Fortran') {
|
|
my $lang_F_or_P_or_P = "";
|
|
php_pascal_or_fortran($full_file ,
|
|
$rh_Err ,
|
|
$raa_errors,
|
|
\$lang_F_or_P_or_P);
|
|
if ($lang_F_or_P_or_P) {
|
|
return $lang_F_or_P_or_P;
|
|
} else { # an error happened in php_pascal_or_fortran()
|
|
$rh_ignored->{$full_file} =
|
|
'failure in php_pascal_or_fortran($full_file)';
|
|
return $language; # (unknown)
|
|
}
|
|
} elsif ($Language_by_Extension{$extension} eq 'Pascal/Puppet') {
|
|
my $lang_Pasc_or_Pup = "";
|
|
pascal_or_puppet( $full_file ,
|
|
$rh_Err ,
|
|
$raa_errors,
|
|
\$lang_Pasc_or_Pup);
|
|
if ($lang_Pasc_or_Pup) {
|
|
return $lang_Pasc_or_Pup;
|
|
} else { # an error happened in pascal_or_puppet()
|
|
$rh_ignored->{$full_file} =
|
|
'failure in pascal_or_puppet()';
|
|
return $language; # (unknown)
|
|
}
|
|
} elsif ($Language_by_Extension{$extension} eq 'Lisp/OpenCL') {
|
|
return Lisp_or_OpenCL($full_file, $rh_Err, $raa_errors);
|
|
} elsif ($Language_by_Extension{$extension} eq 'Lisp/Julia') {
|
|
return Lisp_or_Julia( $full_file, $rh_Err, $raa_errors);
|
|
} elsif ($Language_by_Extension{$extension} eq 'Perl/Prolog') {
|
|
return Perl_or_Prolog($full_file, $rh_Err, $raa_errors);
|
|
} elsif ($Language_by_Extension{$extension} eq 'Raku/Prolog') {
|
|
return Raku_or_Prolog($full_file, $rh_Err, $raa_errors);
|
|
} elsif ($Language_by_Extension{$extension} eq
|
|
'IDL/Qt Project/Prolog/ProGuard') {
|
|
return IDL_or_QtProject($full_file, $rh_Err, $raa_errors);
|
|
} elsif ($Language_by_Extension{$extension} eq 'D/dtrace') {
|
|
# is it D or an init.d shell script?
|
|
my $a_script = really_is_D($full_file, $rh_Err, $raa_errors);
|
|
if ($a_script) {
|
|
# could be dtrace, sh, bash or anything one would
|
|
# write an init.d script in
|
|
if (defined $Language_by_Script{$a_script}) {
|
|
return $Language_by_Script{$a_script};
|
|
} else {
|
|
$rh_ignored->{$full_file} =
|
|
"Unrecognized script language, '$a_script'";
|
|
}
|
|
} else {
|
|
return 'D';
|
|
}
|
|
} elsif ($Language_by_Extension{$extension} eq 'Fortran 77/Forth') {
|
|
return Forth_or_Fortran($full_file, $rh_Err, $raa_errors);
|
|
} elsif ($Language_by_Extension{$extension} eq 'F#/Forth') {
|
|
return Forth_or_Fsharp( $full_file, $rh_Err, $raa_errors);
|
|
} elsif ($Language_by_Extension{$extension} eq 'Verilog-SystemVerilog/Coq') {
|
|
return Verilog_or_Coq( $full_file, $rh_Err, $raa_errors);
|
|
} elsif ($Language_by_Extension{$extension} eq 'Smarty') {
|
|
if ($extension ne "tpl") {
|
|
# unambiguous -- if ends with .smarty, is Smarty
|
|
return $Language_by_Extension{$extension};
|
|
}
|
|
# Smarty extension .tpl is generic; make sure the
|
|
# file at least roughly resembles PHP. Alternatively,
|
|
# if the user forces the issue, do the count.
|
|
my $force_smarty = 0;
|
|
foreach (@opt_force_lang) {
|
|
if (lc($_) eq "smarty,tpl") {
|
|
$force_smarty = 1;
|
|
last;
|
|
}
|
|
}
|
|
if (really_is_smarty($full_file) or $force_smarty) {
|
|
return 'Smarty';
|
|
} else {
|
|
return $language; # (unknown)
|
|
}
|
|
} elsif ($Language_by_Extension{$extension} eq 'TypeScript/Qt Linguist') {
|
|
return TypeScript_or_QtLinguist( $full_file, $rh_Err, $raa_errors);
|
|
} elsif ($Language_by_Extension{$extension} eq 'Qt/Glade') {
|
|
return Qt_or_Glade( $full_file, $rh_Err, $raa_errors);
|
|
} elsif ($Language_by_Extension{$extension} eq 'C#/Smalltalk') {
|
|
my $L = Csharp_or_Smalltalk( $full_file, $rh_Err, $raa_errors);
|
|
if ($L eq 'C#') {
|
|
my $lines = first_line($full_file, 2, $rh_Err, $raa_errors);
|
|
$lines =~ s/\n//mg;
|
|
if ($lines =~ m[//\s+<auto-generated>]) {
|
|
$L = "C# Generated";
|
|
if ($opt_no_autogen) {
|
|
$rh_ignored->{$full_file} =
|
|
'--no-autogen ignores this auto-generated C# file';
|
|
$L = "(unknown)"; # forces it to be ignored
|
|
}
|
|
}
|
|
}
|
|
return $L;
|
|
} elsif ($Language_by_Extension{$extension} eq 'Scheme/SaltStack') {
|
|
return Scheme_or_SaltStack( $full_file, $rh_Err, $raa_errors);
|
|
} elsif ($Language_by_Extension{$extension} eq 'Visual Basic/TeX/Apex Class') {
|
|
my $lang_VB_T_A = "";
|
|
Visual_Basic_or_TeX_or_Apex($full_file ,
|
|
$rh_Err ,
|
|
$raa_errors,
|
|
\$lang_VB_T_A);
|
|
if ($lang_VB_T_A) {
|
|
return $lang_VB_T_A;
|
|
} else { # an error happened in Visual_Basic_or_TeX_or_Apex
|
|
$rh_ignored->{$full_file} =
|
|
'failure in Visual_Basic_or_TeX_or_Apex()';
|
|
return $language; # (unknown)
|
|
}
|
|
} elsif ($Language_by_Extension{$extension} eq 'Brainfuck') {
|
|
if (really_is_bf($full_file)) {
|
|
return $Language_by_Extension{$extension};
|
|
} else {
|
|
return $language; # (unknown)
|
|
}
|
|
} else {
|
|
return $Language_by_Extension{$extension};
|
|
}
|
|
} else { # has an unmapped file extension
|
|
$look_at_first_line = 1;
|
|
}
|
|
}
|
|
# if all else fails look at the prefix instead of extension
|
|
( my $stem = $file ) =~ s/^(.*?)\.\S+$/$1/;
|
|
if ($stem and defined($Language_by_Prefix{$stem})) {
|
|
return $Language_by_Prefix{$stem}
|
|
}
|
|
} elsif (defined $Language_by_File{lc $file}) {
|
|
return $Language_by_File{lc $file};
|
|
} elsif ($opt_lang_no_ext and
|
|
defined $Filters_by_Language{$opt_lang_no_ext}) {
|
|
return $opt_lang_no_ext;
|
|
} else { # no file extension
|
|
$look_at_first_line = 1;
|
|
}
|
|
|
|
if ($look_at_first_line) {
|
|
# maybe it is a shell/Perl/Python/Ruby/etc script that
|
|
# starts with pound bang:
|
|
# #!/usr/bin/perl
|
|
# #!/usr/bin/env perl
|
|
my ($script_language, $L) = peek_at_first_line($full_file ,
|
|
$rh_Err ,
|
|
$raa_errors);
|
|
if (!$script_language) {
|
|
$rh_ignored->{$full_file} = "language unknown (#2)";
|
|
# returns (unknown)
|
|
}
|
|
if (defined $Language_by_Script{$script_language}) {
|
|
if (defined $Filters_by_Language{
|
|
$Language_by_Script{$script_language}}) {
|
|
$language = $Language_by_Script{$script_language};
|
|
} else {
|
|
$rh_ignored->{$full_file} =
|
|
"undefined: Filters_by_Language{" .
|
|
$Language_by_Script{$script_language} .
|
|
"} for scripting language $script_language";
|
|
# returns (unknown)
|
|
}
|
|
} else {
|
|
# #456: XML files can have a variety of domain-specific file
|
|
# extensions. If the extension is unrecognized, examine
|
|
# the first line of the file to see if it is XML
|
|
if ($L =~ /<\?xml\s/) {
|
|
$language = "XML";
|
|
delete $rh_ignored->{$full_file};
|
|
} else {
|
|
$rh_ignored->{$full_file} = "language unknown (#3)";
|
|
}
|
|
# returns (unknown)
|
|
}
|
|
}
|
|
print "<- classify_file($full_file)=$language\n" if $opt_v > 2;
|
|
return $language;
|
|
} # 1}}}
|
|
sub first_line { # {{{1
|
|
# return the first $n_lines of text in the file as one string
|
|
my ($file , # in
|
|
$n_lines , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
my $line = "";
|
|
print "-> first_line($file, $n_lines)\n" if $opt_v > 2;
|
|
if (!can_read($file)) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return $line;
|
|
}
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
print "<- first_line($file, $n_lines)\n" if $opt_v > 2;
|
|
return $line;
|
|
}
|
|
# issue 644: Unicode files can have non-zero $n_lines
|
|
# but empty <$IN> contents
|
|
for (my $i = 0; $i < $n_lines; $i++) {
|
|
my $L = <$IN>;
|
|
last unless defined $L;
|
|
chomp($line .= $L);
|
|
}
|
|
$IN->close;
|
|
print "<- first_line($file, $n_lines, '$line')\n" if $opt_v > 2;
|
|
return $line;
|
|
} # 1}}}
|
|
sub peek_at_first_line { # {{{1
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
|
|
print "-> peek_at_first_line($file)\n" if $opt_v > 2;
|
|
|
|
my $script_language = "";
|
|
my $first_line = first_line($file, 1, $rh_Err, $raa_errors);
|
|
|
|
if (defined $first_line) {
|
|
#print "peek_at_first_line of [$file] first_line=[$first_line]\n";
|
|
if ($first_line =~ /^#\!\s*(\S.*?)$/) {
|
|
#print "peek_at_first_line 1=[$1]\n";
|
|
my @pound_bang = split(' ', $1);
|
|
#print "peek_at_first_line basename 0=[", basename($pound_bang[0]), "]\n";
|
|
if (basename($pound_bang[0]) eq "env" and
|
|
scalar @pound_bang > 1) {
|
|
$script_language = $pound_bang[1];
|
|
#print "peek_at_first_line pound_bang A $pound_bang[1]\n";
|
|
} else {
|
|
$script_language = basename $pound_bang[0];
|
|
#print "peek_at_first_line pound_bang B $script_language\n";
|
|
}
|
|
}
|
|
}
|
|
print "<- peek_at_first_line($file)\n" if $opt_v > 2;
|
|
return ($script_language, $first_line);
|
|
} # 1}}}
|
|
sub different_files { # {{{1
|
|
# See which of the given files are unique by computing each file's MD5
|
|
# sum. Return the subset of files which are unique.
|
|
my ($ra_files , # in
|
|
$rh_Err , # in
|
|
$raa_errors , # out
|
|
$rh_ignored , # out
|
|
) = @_;
|
|
|
|
print "-> different_files(@{$ra_files})\n" if $opt_v > 2;
|
|
my %file_hash = (); # file_hash{md5 hash} = [ file1, file2, ... ]
|
|
foreach my $F (@{$ra_files}) {
|
|
next if is_dir($F); # needed for Windows
|
|
my $IN = open_file('<', $F, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $F];
|
|
$rh_ignored->{$F} = 'cannot read';
|
|
} else {
|
|
if ($HAVE_Digest_MD5) {
|
|
binmode $IN;
|
|
my $MD5 = Digest::MD5->new->addfile($IN)->hexdigest;
|
|
#print "$F, $MD5\n";
|
|
push @{$file_hash{$MD5}}, $F;
|
|
} else {
|
|
# all files treated unique
|
|
push @{$file_hash{$F}}, $F;
|
|
}
|
|
$IN->close;
|
|
}
|
|
}
|
|
|
|
# Loop over file sets having identical MD5 sums. Within
|
|
# each set, pick the file that most resembles known source
|
|
# code.
|
|
my @unique = ();
|
|
for my $md5 (sort keys %file_hash) {
|
|
my $i_best = 0;
|
|
for (my $i = 1; $i < scalar(@{$file_hash{$md5}}); $i++) {
|
|
my $F = $file_hash{$md5}[$i];
|
|
my (@nul_a, %nul_h);
|
|
my $language = classify_file($F, $rh_Err,
|
|
# don't save these errors; pointless
|
|
\@nul_a, \%nul_h);
|
|
$i_best = $i if $language ne "(unknown)";
|
|
}
|
|
# keep the best one found and identify the rest as ignored
|
|
for (my $i = 0; $i < scalar(@{$file_hash{$md5}}); $i++) {
|
|
if ($i == $i_best) {
|
|
push @unique, $file_hash{$md5}[$i_best];
|
|
} else {
|
|
$rh_ignored->{$file_hash{$md5}[$i]} = "duplicate of " .
|
|
$file_hash{$md5}[$i_best];
|
|
}
|
|
}
|
|
|
|
}
|
|
print "<- different_files(@unique)\n" if $opt_v > 2;
|
|
return @unique;
|
|
} # 1}}}
|
|
sub call_counter { # {{{1
|
|
my ($file , # in
|
|
$language , # in
|
|
$ra_Errors, # out
|
|
) = @_;
|
|
|
|
# Logic: pass the file through the following filters:
|
|
# 1. remove leading lines (if --skip-leading)
|
|
# 2. remove blank lines
|
|
# 3. remove comments using each filter defined for this language
|
|
# (example: SQL has two, remove_starts_with(--) and
|
|
# remove_c_comments() )
|
|
# 4. compute comment lines as
|
|
# total lines - blank lines - lines left over after all
|
|
# comment filters have been applied
|
|
|
|
print "-> call_counter($file, $language)\n" if $opt_v > 2;
|
|
#print "call_counter: ", Dumper(@routines), "\n";
|
|
|
|
my @lines = ();
|
|
my $ascii = "";
|
|
if (is_binary($file) and $opt_unicode) {
|
|
# was binary so must be unicode
|
|
|
|
$/ = undef;
|
|
my $IN = open_file('<', $file, 1);
|
|
my $bin_text = <$IN>;
|
|
$IN->close;
|
|
$/ = "\n";
|
|
|
|
$ascii = unicode_to_ascii( $bin_text );
|
|
@lines = split("\n", $ascii );
|
|
foreach (@lines) { $_ = "$_\n"; }
|
|
|
|
} else {
|
|
# regular text file
|
|
@lines = read_file($file);
|
|
$ascii = join('', @lines);
|
|
}
|
|
|
|
# implement --perl-ignore-data here
|
|
|
|
if ($opt_skip_leading) {
|
|
my $strip = 1;
|
|
my ($N, @exts) = split(/,/, $opt_skip_leading);
|
|
if (@exts) {
|
|
# only apply if this file's extension is listed
|
|
my $this_file_ext = file_extension($file);
|
|
$strip = grep(/^${this_file_ext}$/, @exts);
|
|
}
|
|
@lines = remove_first_n($N, \@lines) if $strip;
|
|
}
|
|
|
|
my @original_lines = @lines;
|
|
my $total_lines = scalar @lines;
|
|
|
|
print_lines($file, "Original file:", \@lines) if $opt_print_filter_stages;
|
|
@lines = rm_blanks(\@lines, $language, \%EOL_Continuation_re); # remove blank lines
|
|
my $blank_lines = $total_lines - scalar @lines;
|
|
print " call_counter: total_lines=$total_lines blank_lines=",
|
|
$blank_lines, "\n" if $opt_v > 2;
|
|
print_lines($file, "Blank lines removed:", \@lines)
|
|
if $opt_print_filter_stages;
|
|
|
|
@lines = rm_comments(\@lines, $language, $file,
|
|
\%EOL_Continuation_re, $ra_Errors);
|
|
|
|
my $comment_lines = $total_lines - $blank_lines - scalar @lines;
|
|
if ($opt_strip_comments) {
|
|
my $stripped_file = "";
|
|
if ($opt_original_dir) {
|
|
$stripped_file = $file . ".$opt_strip_comments";
|
|
} else {
|
|
$stripped_file = basename $file . ".$opt_strip_comments";
|
|
}
|
|
write_file($stripped_file, {}, @lines);
|
|
}
|
|
if ($opt_strip_code) {
|
|
my $stripped_file = "";
|
|
if ($opt_original_dir) {
|
|
$stripped_file = $file . ".$opt_strip_code";
|
|
} else {
|
|
$stripped_file = basename $file . ".$opt_strip_code";
|
|
}
|
|
write_file($stripped_file, {}, rm_code(\@original_lines, \@lines));
|
|
}
|
|
if ($opt_html and !$opt_diff) {
|
|
chomp(@original_lines); # includes blank lines, comments
|
|
chomp(@lines); # no blank lines, no comments
|
|
|
|
my (@diff_L, @diff_R, %count);
|
|
|
|
# remove blank lines to get better quality diffs; count
|
|
# blank lines separately
|
|
my @original_lines_minus_white = ();
|
|
# however must keep track of how many blank lines were removed and
|
|
# where they were removed so that the HTML display can include it
|
|
my %blank_line = ();
|
|
my $insert_line = 0;
|
|
foreach (@original_lines) {
|
|
if (/^\s*$/) {
|
|
++$count{blank}{same};
|
|
++$blank_line{ $insert_line };
|
|
} else {
|
|
++$insert_line;
|
|
push @original_lines_minus_white, $_;
|
|
}
|
|
}
|
|
|
|
array_diff( $file , # in
|
|
\@original_lines_minus_white , # in
|
|
\@lines , # in
|
|
"comment" , # in
|
|
\@diff_L, \@diff_R, , # out
|
|
$ra_Errors); # in/out
|
|
write_comments_to_html($file, \@diff_L, \@diff_R, \%blank_line);
|
|
#print Dumper("count", \%count);
|
|
}
|
|
|
|
print "<- call_counter($total_lines, $blank_lines, $comment_lines)\n"
|
|
if $opt_v > 2;
|
|
return ($total_lines, $blank_lines, $comment_lines);
|
|
} # 1}}}
|
|
sub windows_glob { # {{{1
|
|
# Windows doesn't expand wildcards. Use code from Sean M. Burke's
|
|
# Win32::Autoglob module to do this.
|
|
return map {;
|
|
( defined($_) and m/[\*\?]/ ) ? sort(glob($_)) : $_
|
|
} @_;
|
|
} # 1}}}
|
|
sub write_file { # {{{1
|
|
my ($file , # in
|
|
$rh_options , # in
|
|
@lines , # in
|
|
) = @_;
|
|
# If $file is a conventional scalar, it is the name of the file to write to.
|
|
# if $file is a reference to a scalar, rather than writing @lines to a file,
|
|
# write @lines to this scalar as a single string.
|
|
|
|
my $local_formatting = 0;
|
|
foreach my $opt (sort keys %{$rh_options}) {
|
|
# print "write_file option $opt = $rh_options->{$opt}\n";
|
|
$local_formatting = 1;
|
|
}
|
|
my $write_to_file = ref($file) eq "" ? 1 : 0;
|
|
|
|
#print "write_file 1 [$file]\n";
|
|
# Do ~ expansion (by Tim LaBerge, fixes bug 2787984)
|
|
if ($write_to_file) {
|
|
my $preglob_filename = $file;
|
|
#print "write_file 2 [$preglob_filename]\n";
|
|
if ($ON_WINDOWS) {
|
|
$file = (windows_glob($file))[0];
|
|
} else {
|
|
$file = File::Glob::bsd_glob($file);
|
|
}
|
|
#print "write_file 3 [$file]\n";
|
|
$file = $preglob_filename unless $file;
|
|
#print "write_file 4 [$file]\n";
|
|
}
|
|
|
|
if ($write_to_file) {
|
|
print "-> write_file($file)\n" if $opt_v > 2;
|
|
} else {
|
|
print "-> write_file() -- writing to string variable\n" if $opt_v > 2;
|
|
}
|
|
|
|
my @prt_lines = ();
|
|
|
|
my $n_col = undef;
|
|
if ($local_formatting) {
|
|
$n_col = scalar @{$rh_options->{'columns'}};
|
|
if ($opt_xml) {
|
|
push @prt_lines, '<?xml version="1.0" encoding="UTF-8"?>' . "\n";
|
|
push @prt_lines, "<all_$rh_options->{'file_type'}>\n";
|
|
} elsif ($opt_yaml) {
|
|
push @prt_lines, "---\n";
|
|
} elsif ($opt_md) {
|
|
push @prt_lines, join("|", @{$rh_options->{'columns'}}) . "\n";
|
|
push @prt_lines, join("|", map( ":------", 1 .. $n_col)) . "\n";
|
|
}
|
|
}
|
|
|
|
chomp(@lines);
|
|
|
|
if ($local_formatting) {
|
|
my @json_lines = ();
|
|
foreach my $L (@lines) {
|
|
my @entries;
|
|
if ($rh_options->{'separator'}) {
|
|
@entries = split($rh_options->{'separator'}, $L, $n_col);
|
|
} else {
|
|
@entries = ( $L );
|
|
}
|
|
if ($opt_xml) {
|
|
push @prt_lines, " <$rh_options->{'file_type'} ";
|
|
for (my $i = 0; $i < $n_col; $i++) {
|
|
push @prt_lines, sprintf("%s=\"%s\" ", $rh_options->{'columns'}[$i], $entries[$i]);
|
|
}
|
|
push @prt_lines, "/>\n";
|
|
} elsif ($opt_yaml or $opt_json) {
|
|
my @pairs = ();
|
|
for (my $i = 0; $i < $n_col; $i++) {
|
|
push @pairs,
|
|
sprintf "\"%s\":\"%s\"", $rh_options->{'columns'}[$i], $entries[$i];
|
|
}
|
|
if ($opt_json) {
|
|
# JSON can't literal '\x' in filenames, #575
|
|
$pairs[0] =~ s/\\x//g;
|
|
push @json_lines, join(", ", @pairs );
|
|
} else {
|
|
push @prt_lines, "- {", join(", ", @pairs) . "}\n";
|
|
}
|
|
} elsif ($opt_csv) {
|
|
push @prt_lines, join(",", @entries) . "\n";
|
|
} elsif ($opt_md) {
|
|
push @prt_lines, join("|", @entries) . "\n";
|
|
}
|
|
}
|
|
if ($opt_json) {
|
|
push @prt_lines, "[{" . join("},\n {", @json_lines) . "}]\n";
|
|
}
|
|
if (!$opt_json and !$opt_yaml and !$opt_xml and !$opt_csv) {
|
|
push @prt_lines, join("\n", @lines) . "\n";
|
|
}
|
|
} else {
|
|
push @prt_lines, join("\n", @lines) . "\n";
|
|
}
|
|
|
|
if ($local_formatting and $opt_xml) {
|
|
push @prt_lines, "</all_$rh_options->{'file_type'}>\n";
|
|
}
|
|
|
|
# Create the destination directory if it doesn't already exist.
|
|
my $abs_file_path = File::Spec->rel2abs( $file );
|
|
my ($volume, $directories, $filename) = File::Spec->splitpath( $abs_file_path );
|
|
mkpath($volume . $directories, 1, 0777);
|
|
|
|
my $OUT = undef;
|
|
unlink_file($file);
|
|
if ($opt_file_encoding) {
|
|
$OUT = open_file(">:encoding($opt_file_encoding)", $file, 0);
|
|
} else {
|
|
$OUT = open_file('>', $file, 1);
|
|
}
|
|
if (!defined $OUT) {
|
|
warn "Unable to write to $file\n";
|
|
print "<- write_file\n" if $opt_v > 2;
|
|
return;
|
|
}
|
|
print $OUT @prt_lines;
|
|
$OUT->close;
|
|
|
|
if (can_read($file)) {
|
|
print "Wrote $file" unless $opt_quiet or $opt_fmt > 0;
|
|
print ", $CLOC_XSL" if $opt_xsl and $opt_xsl eq $CLOC_XSL;
|
|
print "\n" unless $opt_quiet;
|
|
}
|
|
|
|
print "<- write_file\n" if $opt_v > 2;
|
|
} # 1}}}
|
|
sub file_pairs_from_file { # {{{1
|
|
my ($file , # in
|
|
$ra_added , # out
|
|
$ra_removed , # out
|
|
$ra_compare_list , # out
|
|
) = @_;
|
|
#
|
|
# Example valid input format for $file
|
|
# 1)
|
|
# A/d1/hello.f90 | B/d1/hello.f90
|
|
# A/hello.C | B/hello.C
|
|
# A/d2/hi.py | B/d2/hi.py
|
|
#
|
|
# 2)
|
|
# Files added: 1
|
|
# + B/extra_file.pl ; Perl
|
|
#
|
|
# Files removed: 1
|
|
# - A/d2/hello.java ; Java
|
|
#
|
|
# File pairs compared: 3
|
|
# != A/d1/hello.f90 | B/d1/hello.f90 ; Fortran 90
|
|
# != A/hello.C | B/hello.C ; C++
|
|
# == A/d2/hi.py | B/d2/hi.py ; Python
|
|
|
|
print "-> file_pairs_from_file($file)\n" if $opt_v and $opt_v > 2;
|
|
@{$ra_compare_list} = ();
|
|
my @lines = read_file($file);
|
|
my $mode = "compare";
|
|
foreach my $L (@lines) {
|
|
next if $L =~ /^\s*$/ or $L =~ /^\s*#/;
|
|
chomp($L);
|
|
if ($L =~ /^Files\s+(added|removed):/) {
|
|
$mode = $1;
|
|
} elsif ($L =~ /^File\s+pairs\s+compared:/) {
|
|
$mode = "compare";
|
|
} elsif ($mode eq "added" or $mode eq "removed") {
|
|
$L =~ m/^\s*[+-]\s+(.*?)\s+;/;
|
|
my $F = $1;
|
|
if (!defined $1) {
|
|
warn "file_pairs_from_file($file) parse failure\n",
|
|
"in $mode mode for '$L', ignoring\n";
|
|
next;
|
|
}
|
|
if ($mode eq "added") {
|
|
push @{$ra_added} , $F;
|
|
} else {
|
|
push @{$ra_removed}, $F;
|
|
}
|
|
} else {
|
|
$L =~ m/^\s*([!=]=\s*)?(.*?)\s*\|\s*(.*?)\s*(;.*?)?$/;
|
|
if (!defined $2 or !defined $3) {
|
|
warn "file_pairs_from_file($file) parse failure\n",
|
|
"in compare mode for '$L', ignoring\n";
|
|
next;
|
|
}
|
|
push @{$ra_compare_list}, ( [$2, $3] );
|
|
}
|
|
}
|
|
print "<- file_pairs_from_file\n" if $opt_v and $opt_v > 2;
|
|
}
|
|
sub read_file { # {{{1
|
|
my ($file, ) = @_;
|
|
print "-> read_file($file)\n" if $opt_v and $opt_v > 2;
|
|
my %BoM = (
|
|
"fe ff" => 2 ,
|
|
"ff fe" => 2 ,
|
|
"ef bb bf" => 3 ,
|
|
"f7 64 4c" => 3 ,
|
|
"0e fe ff" => 3 ,
|
|
"fb ee 28" => 3 ,
|
|
"00 00 fe ff" => 4 ,
|
|
"ff fe 00 00" => 4 ,
|
|
"2b 2f 76 38" => 4 ,
|
|
"2b 2f 76 39" => 4 ,
|
|
"2b 2f 76 2b" => 4 ,
|
|
"2b 2f 76 2f" => 4 ,
|
|
"dd 73 66 73" => 4 ,
|
|
"84 31 95 33" => 4 ,
|
|
"2b 2f 76 38 2d" => 5 ,
|
|
);
|
|
|
|
my @lines = ();
|
|
my $IN = open_file('<', $file, 1);
|
|
if (defined $IN) {
|
|
@lines = <$IN>;
|
|
$IN->close;
|
|
if ($lines[$#lines]) { # test necessary for zero content files
|
|
# (superfluous?)
|
|
# Some files don't end with a new line. Force this:
|
|
$lines[$#lines] .= "\n" unless $lines[$#lines] =~ m/\n$/;
|
|
}
|
|
} else {
|
|
warn "Unable to read $file\n";
|
|
}
|
|
|
|
# Are first few characters of the file Unicode Byte Order
|
|
# Marks (http://en.wikipedia.org/wiki/Byte_Order_Mark)?
|
|
# If yes, remove them.
|
|
if (@lines) {
|
|
my @chrs = split('', $lines[0]);
|
|
my $n_chrs = scalar @chrs;
|
|
my ($n2, $n3, $n4, $n5) = ('', '', '', '');
|
|
$n2 = sprintf("%x %x", map ord, @chrs[0,1]) if $n_chrs >= 2;
|
|
$n3 = sprintf("%s %x", $n2, ord $chrs[2]) if $n_chrs >= 3;
|
|
$n4 = sprintf("%s %x", $n3, ord $chrs[3]) if $n_chrs >= 4;
|
|
$n5 = sprintf("%s %x", $n4, ord $chrs[4]) if $n_chrs >= 5;
|
|
if (defined $BoM{$n2}) { $lines[0] = substr $lines[0], 2;
|
|
} elsif (defined $BoM{$n3}) { $lines[0] = substr $lines[0], 3;
|
|
} elsif (defined $BoM{$n4}) { $lines[0] = substr $lines[0], 4;
|
|
} elsif (defined $BoM{$n5}) { $lines[0] = substr $lines[0], 5;
|
|
}
|
|
}
|
|
|
|
# Trim DOS line endings. This allows Windows files
|
|
# to be diff'ed with Unix files without line endings
|
|
# causing every line to differ.
|
|
foreach (@lines) { s/\cM$// }
|
|
|
|
print "<- read_file\n" if $opt_v and $opt_v > 2;
|
|
return @lines;
|
|
} # 1}}}
|
|
sub rm_blanks { # {{{1
|
|
my ($ra_in ,
|
|
$language ,
|
|
$rh_EOL_continuation_re) = @_;
|
|
print "-> rm_blanks(language=$language)\n" if $opt_v > 2;
|
|
#print "rm_blanks: language = [$language]\n";
|
|
my @out = ();
|
|
if ($language eq "COBOL") {
|
|
@out = remove_cobol_blanks($ra_in);
|
|
} else {
|
|
# removes blank lines
|
|
if (defined $rh_EOL_continuation_re->{$language}) {
|
|
@out = remove_matches_2re($ra_in, blank_regex($language),
|
|
$rh_EOL_continuation_re->{$language});
|
|
} else {
|
|
@out = remove_matches($ra_in, blank_regex($language));
|
|
}
|
|
}
|
|
|
|
print "<- rm_blanks(language=$language, n_remain= ",
|
|
scalar(@out), "\n" if $opt_v > 2;
|
|
return @out;
|
|
} # 1}}}
|
|
sub blank_regex { # {{{1
|
|
my ($language) = @_;
|
|
|
|
print "-> blank_regex(language=$language)\n" if $opt_v > 2;
|
|
|
|
my $blank_regex = '^\s*$';
|
|
if ($language eq "X++") {
|
|
$blank_regex = '^\s*#?\s*$';
|
|
}
|
|
|
|
print "<- blank_regex(language=$language) = \"", $blank_regex, "\"\n" if $opt_v > 2;
|
|
return $blank_regex;
|
|
} # 1}}}
|
|
sub rm_comments { # {{{1
|
|
my ($ra_lines , # in, must be free of blank lines
|
|
$language , # in
|
|
$file , # in (some language counters, eg Haskell, need
|
|
# access to the original file)
|
|
$rh_EOL_continuation_re , # in
|
|
$raa_Errors , # out
|
|
) = @_;
|
|
print "-> rm_comments(file=$file)\n" if $opt_v > 2;
|
|
my @routines = @{$Filters_by_Language{$language}};
|
|
my @lines = @{$ra_lines};
|
|
my @original_lines = @{$ra_lines};
|
|
|
|
if (!scalar @original_lines) {
|
|
return @lines;
|
|
}
|
|
|
|
foreach my $call_string (@routines) {
|
|
my $subroutine = $call_string->[0];
|
|
next if $subroutine eq "rm_comments_in_strings" and !$opt_strip_str_comments;
|
|
if (! defined &{$subroutine}) {
|
|
warn "rm_comments undefined subroutine $subroutine for $file\n";
|
|
next;
|
|
}
|
|
print "rm_comments file=$file sub=$subroutine\n" if $opt_v > 1;
|
|
my @args = @{$call_string};
|
|
shift @args; # drop the subroutine name
|
|
if (@args and $args[0] eq '>filename<') {
|
|
shift @args;
|
|
unshift @args, $file;
|
|
}
|
|
|
|
# Unusual inputs, namely /* within strings without
|
|
# a corresponding */ can cause huge delays so put a timer on this.
|
|
my $max_duration_sec = scalar(@lines)/1000.0; # est lines per second
|
|
$max_duration_sec = 1.0 if $max_duration_sec < 1;
|
|
if (defined $opt_timeout) {
|
|
$max_duration_sec = $opt_timeout if $opt_timeout > 0;
|
|
}
|
|
#my $T_start = Time::HiRes::time();
|
|
eval {
|
|
local $SIG{ALRM} = sub { die "alarm\n" };
|
|
alarm $max_duration_sec;
|
|
no strict 'refs';
|
|
@lines = &{$subroutine}(\@lines, @args); # apply filter...
|
|
alarm 0;
|
|
};
|
|
if ($@) {
|
|
# timed out
|
|
die unless $@ eq "alarm\n";
|
|
push @{$raa_Errors},
|
|
[ $Error_Codes{'Line count, exceeded timeout'}, $file ];
|
|
@lines = ();
|
|
if ($opt_v) {
|
|
warn "rm_comments($subroutine): exceeded timeout for $file--ignoring\n";
|
|
}
|
|
next;
|
|
}
|
|
#print "end time = ",Time::HiRes::time() - $T_start;
|
|
|
|
print " rm_comments after $subroutine line count=",
|
|
scalar(@lines), "\n" if $opt_v > 2;
|
|
|
|
#print "lines after=\n";
|
|
#print Dumper(\@lines);
|
|
|
|
print_lines($file, "After $subroutine(@args)", \@lines)
|
|
if $opt_print_filter_stages;
|
|
# then remove blank lines which are created by comment removal
|
|
if (defined $rh_EOL_continuation_re->{$language}) {
|
|
@lines = remove_matches_2re(\@lines, '^\s*$',
|
|
$rh_EOL_continuation_re->{$language});
|
|
} else {
|
|
@lines = remove_matches(\@lines, '^\s*$');
|
|
}
|
|
|
|
print_lines($file, "post $subroutine(@args) blank cleanup:", \@lines)
|
|
if $opt_print_filter_stages;
|
|
}
|
|
|
|
foreach (@lines) { chomp } # make sure no spurious newlines were added
|
|
|
|
# Exception for scripting languages: treat the first #! line as code.
|
|
# Will need to add it back in if it was removed earlier.
|
|
chomp( $original_lines[0] );
|
|
if (defined $Script_Language{$language} and
|
|
$original_lines[0] =~ /^#!/ and
|
|
(!scalar(@lines) or ($lines[0] ne $original_lines[0]))) {
|
|
unshift @lines, $original_lines[0]; # add the first line back
|
|
}
|
|
|
|
print "<- rm_comments\n" if $opt_v > 2;
|
|
return @lines;
|
|
} # 1}}}
|
|
sub rm_code { # {{{1
|
|
# Return lines containing only comments.
|
|
my ($ra_lines_w_comments , # in
|
|
$ra_lines_no_comments, # in
|
|
) = @_;
|
|
|
|
my @w_comments_no_blanks = grep { ! /^\s*$/ } @{$ra_lines_w_comments} ;
|
|
my @no_comments_no_blanks = grep { ! /^\s*$/ } @{$ra_lines_no_comments};
|
|
chomp( @w_comments_no_blanks );
|
|
chomp( @no_comments_no_blanks );
|
|
|
|
my @sdiffs = sdiff( \@w_comments_no_blanks, \@no_comments_no_blanks, );
|
|
my @comments = ();
|
|
foreach my $entry (@sdiffs) {
|
|
my ($out_1, $out_2) = ('', '');
|
|
next if $entry->[0] eq 'u';
|
|
if ($entry->[0] eq '-') {
|
|
$out_1 = $entry->[1];
|
|
}
|
|
next if $out_1 =~ /^\s*$/;
|
|
push @comments, $out_1;
|
|
}
|
|
return @comments;
|
|
} # 1}}}
|
|
sub remove_first_n { # {{{1
|
|
my ($n, $ra_lines, ) = @_;
|
|
print "-> remove_first_n\n" if $opt_v > 2;
|
|
|
|
my @save_lines = ();
|
|
if (scalar @{$ra_lines} > $n) {
|
|
for (my $i = $n; $i < scalar @{$ra_lines}; $i++) {
|
|
push @save_lines, $ra_lines->[$i];
|
|
}
|
|
}
|
|
|
|
print "<- remove_first_n\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_f77_comments { # {{{1
|
|
my ($ra_lines, ) = @_;
|
|
print "-> remove_f77_comments\n" if $opt_v > 2;
|
|
|
|
my @save_lines = ();
|
|
foreach (@{$ra_lines}) {
|
|
next if m{^[*cC]};
|
|
next if m{^\s*!};
|
|
push @save_lines, $_;
|
|
}
|
|
|
|
print "<- remove_f77_comments\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_f90_comments { # {{{1
|
|
# derived from SLOCCount
|
|
my ($ra_lines, ) = @_;
|
|
print "-> remove_f90_comments\n" if $opt_v > 2;
|
|
|
|
my @save_lines = ();
|
|
foreach (@{$ra_lines}) {
|
|
# a comment is m/^\s*!/
|
|
# an empty line is m/^\s*$/
|
|
# a HPF statement is m/^\s*!hpf\$/i
|
|
# an Open MP statement is m/^\s*!omp\$/i
|
|
if (! m/^(\s*!|\s*$)/ || m/^\s*!(hpf|omp)\$/i) {
|
|
push @save_lines, $_;
|
|
}
|
|
}
|
|
|
|
print "<- remove_f90_comments\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub reduce_to_rmd_code_blocks { #{{{1
|
|
my ($ra_lines) = @_; #in
|
|
print "-> reduce_to_rmd_code_blocks()\n" if $opt_v > 2;
|
|
|
|
my $in_code_block = 0;
|
|
my @save_lines = ();
|
|
foreach (@{$ra_lines}) {
|
|
if ( m/^```\{\s*[[:alpha:]]/ ) {
|
|
$in_code_block = 1;
|
|
next;
|
|
}
|
|
if ( m/^```\s*$/ ) {
|
|
$in_code_block = 0;
|
|
}
|
|
next if (!$in_code_block);
|
|
push @save_lines, $_;
|
|
}
|
|
|
|
print "<- reduce_to_rmd_code_blocks()\n" if $opt_v> 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_matches { # {{{1
|
|
my ($ra_lines, # in
|
|
$pattern , # in Perl regular expression (case insensitive)
|
|
) = @_;
|
|
print "-> remove_matches(pattern=$pattern)\n" if $opt_v > 2;
|
|
print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
|
|
|
|
my @save_lines = ();
|
|
foreach (@{$ra_lines}) {
|
|
#chomp;
|
|
#print "remove_matches [$pattern] [$_]\n";
|
|
next if m{$pattern}i;
|
|
# s{$pattern}{}i;
|
|
# next unless /\S/; # at least one non space
|
|
push @save_lines, $_;
|
|
}
|
|
|
|
print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
|
|
print "<- remove_matches\n" if $opt_v > 2;
|
|
#print "remove_matches returning\n ", join("\n ", @save_lines), "\n";
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_matches_2re { # {{{1
|
|
my ($ra_lines, # in
|
|
$pattern1, # in Perl regex 1 (case insensitive) to match
|
|
$pattern2, # in Perl regex 2 (case insensitive) to not match prev line
|
|
) = @_;
|
|
print "-> remove_matches_2re(pattern=$pattern1,$pattern2)\n" if $opt_v > 2;
|
|
print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
|
|
|
|
my @save_lines = ();
|
|
for (my $i = 0; $i < scalar @{$ra_lines}; $i++) {
|
|
# chomp($ra_lines->[$i]);
|
|
#print "remove_matches_2re [$pattern1] [$pattern2] [$ra_lines->[$i]]\n";
|
|
if ($i) {
|
|
#print "remove_matches_2re prev=[$ra_lines->[$i-1]] this=[$ra_lines->[$i]]\n";
|
|
next if ($ra_lines->[$i] =~ m{$pattern1}i) and
|
|
($ra_lines->[$i-1] !~ m{$pattern2}i);
|
|
} else {
|
|
# on first line
|
|
next if $ra_lines->[$i] =~ m{$pattern1}i;
|
|
}
|
|
push @save_lines, $ra_lines->[$i];
|
|
}
|
|
|
|
print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
|
|
print "<- remove_matches_2re\n" if $opt_v > 2;
|
|
#print "remove_matches_2re returning\n ", join("\n ", @save_lines), "\n";
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_inline { # {{{1
|
|
my ($ra_lines, # in
|
|
$pattern , # in Perl regular expression (case insensitive)
|
|
) = @_;
|
|
print "-> remove_inline(pattern=$pattern)\n" if $opt_v > 2;
|
|
|
|
my @save_lines = ();
|
|
unless ($opt_inline) {
|
|
return @{$ra_lines};
|
|
}
|
|
my $nLines_affected = 0;
|
|
foreach (@{$ra_lines}) {
|
|
#chomp; print "remove_inline [$pattern] [$_]\n";
|
|
if (m{$pattern}i) {
|
|
++$nLines_affected;
|
|
s{$pattern}{}i;
|
|
}
|
|
push @save_lines, $_;
|
|
}
|
|
|
|
print "<- remove_inline\n" if $opt_v > 2;
|
|
#print "remove_inline returning\n ", join("\n ", @save_lines), "\n";
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_above { # {{{1
|
|
my ($ra_lines, $marker, ) = @_;
|
|
print "-> remove_above(marker=$marker)\n" if $opt_v > 2;
|
|
|
|
# Make two passes through the code:
|
|
# 1. check if the marker exists
|
|
# 2. remove anything above the marker if it exists,
|
|
# do nothing if the marker does not exist
|
|
|
|
# Pass 1
|
|
my $found_marker = 0;
|
|
for (my $line_number = 1;
|
|
$line_number <= scalar @{$ra_lines};
|
|
$line_number++) {
|
|
if ($ra_lines->[$line_number-1] =~ m{$marker}) {
|
|
$found_marker = $line_number;
|
|
last;
|
|
}
|
|
}
|
|
|
|
# Pass 2 only if needed
|
|
my @save_lines = ();
|
|
if ($found_marker) {
|
|
my $n = 1;
|
|
foreach (@{$ra_lines}) {
|
|
push @save_lines, $_
|
|
if $n >= $found_marker;
|
|
++$n;
|
|
}
|
|
} else { # marker wasn't found; save all lines
|
|
foreach (@{$ra_lines}) {
|
|
push @save_lines, $_;
|
|
}
|
|
}
|
|
|
|
print "<- remove_above\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_below { # {{{1
|
|
my ($ra_lines, $marker, ) = @_;
|
|
print "-> remove_below(marker=$marker)\n" if $opt_v > 2;
|
|
|
|
my @save_lines = ();
|
|
foreach (@{$ra_lines}) {
|
|
last if m{$marker};
|
|
push @save_lines, $_;
|
|
}
|
|
|
|
print "<- remove_below\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_below_above { # {{{1
|
|
my ($ra_lines, $marker_below, $marker_above, ) = @_;
|
|
# delete lines delimited by start and end line markers such
|
|
# as Perl POD documentation
|
|
print "-> remove_below_above(markerB=$marker_below, A=$marker_above)\n"
|
|
if $opt_v > 2;
|
|
|
|
my @save_lines = ();
|
|
my $between = 0;
|
|
foreach (@{$ra_lines}) {
|
|
if (!$between and m{$marker_below}) {
|
|
$between = 1;
|
|
next;
|
|
}
|
|
if ($between and m{$marker_above}) {
|
|
$between = 0;
|
|
next;
|
|
}
|
|
next if $between;
|
|
push @save_lines, $_;
|
|
}
|
|
|
|
print "<- remove_below_above\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_between { # {{{1
|
|
my ($ra_lines, $marker, ) = @_;
|
|
# $marker must contain one of the balanced pairs understood
|
|
# by Regexp::Common::balanced, namely
|
|
# '{}' '()' '[]' or '<>'
|
|
|
|
print "-> remove_between(marker=$marker)\n" if $opt_v > 2;
|
|
print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
|
|
my %acceptable = ('{}'=>1, '()'=>1, '[]'=>1, '<>'=>1, );
|
|
die "remove_between: invalid delimiter '$marker'\n",
|
|
"the delimiter must be one of these four pairs:\n",
|
|
"{} () [] <>\n" unless
|
|
$acceptable{$marker};
|
|
|
|
Install_Regexp_Common() unless $HAVE_Rexexp_Common;
|
|
|
|
my $all_lines = join("", @{$ra_lines});
|
|
|
|
no strict 'vars';
|
|
# otherwise get:
|
|
# Global symbol "%RE" requires explicit package name at cloc line xx.
|
|
if ($all_lines =~ m/$RE{balanced}{-parens => $marker}/) {
|
|
no warnings;
|
|
$all_lines =~ s/$1//g;
|
|
}
|
|
|
|
print "[", join("]\n[", split(/\n/, $all_lines)), "]\n" if $opt_v > 4;
|
|
print "<- remove_between\n" if $opt_v > 2;
|
|
return split("\n", $all_lines);
|
|
} # 1}}}
|
|
sub rm_comments_in_strings { # {{{1
|
|
my ($ra_lines, $string_marker, $start_comment, $end_comment, $multiline_mode) = @_;
|
|
$multiline_mode = 0 if not defined $multiline_mode;
|
|
# Replace comments within strings with 'xx'.
|
|
|
|
print "-> rm_comments_in_strings(string_marker=$string_marker, " .
|
|
"start_comment=$start_comment, end_comment=$end_comment)\n"
|
|
if $opt_v > 2;
|
|
print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
|
|
|
|
my @save_lines = ();
|
|
my $in_ml_string = 0;
|
|
foreach my $line (@{$ra_lines}) {
|
|
#print "line=[$line]\n";
|
|
my $new_line = "";
|
|
|
|
if ($line !~ /${string_marker}/) {
|
|
# short circuit; no strings on this line
|
|
if ( $in_ml_string ) {
|
|
$line =~ s/\Q${start_comment}\E/xx/g;
|
|
$line =~ s/\Q${end_comment}\E/xx/g if $end_comment;
|
|
}
|
|
push @save_lines, $line;
|
|
next;
|
|
}
|
|
|
|
# replace backslashed string markers with 'Q'
|
|
$line =~ s/\\${string_marker}/Q/g;
|
|
|
|
if ( $in_ml_string and $line =~ /^(.*?)(${string_marker})(.*)$/ ) {
|
|
# A multiline string ends on this line. Process the part
|
|
# until the end of the multiline string first.
|
|
my ($lastpart_ml_string, $firstpart_marker, $rest_of_line ) = ($1, $2, $3);
|
|
$lastpart_ml_string =~ s/\Q${start_comment}\E/xx/g;
|
|
$lastpart_ml_string =~ s/\Q${end_comment}\E/xx/g if $end_comment;
|
|
$new_line = $lastpart_ml_string . $firstpart_marker;
|
|
$line = $rest_of_line;
|
|
$in_ml_string = 0;
|
|
}
|
|
|
|
my @tokens = split(/(${string_marker}.*?${string_marker})/, $line);
|
|
foreach my $t (@tokens) {
|
|
#printf " t0 = [$t]\n";
|
|
if ($t =~ /${string_marker}.*${string_marker}$/) {
|
|
# enclosed in quotes; process this token
|
|
$t =~ s/\Q${start_comment}\E/xx/g;
|
|
$t =~ s/\Q${end_comment}\E/xx/g if $end_comment;
|
|
}
|
|
elsif ( $multiline_mode and $t =~ /(${string_marker})/ ) {
|
|
# Unclosed quote present in line. If multiline_mode is enabled,
|
|
# consider it the start of a multiline string.
|
|
my $firstpart_marker = $1;
|
|
my @sub_token = split(/${string_marker}/, $t );
|
|
|
|
if ( scalar @sub_token == 1 ) {
|
|
# The line ends with a string marker that starts
|
|
# a multiline string.
|
|
$t = $sub_token[0] . $firstpart_marker;
|
|
$in_ml_string = 1;
|
|
}
|
|
elsif ( scalar @sub_token == 2 ) {
|
|
# The line has some more content after the string
|
|
# marker that starts a multiline string
|
|
$t = $sub_token[0] . $firstpart_marker;
|
|
$sub_token[1] =~ s/\Q${start_comment}\E/xx/g;
|
|
$sub_token[1] =~ s/\Q${end_comment}\E/xx/g if $end_comment;
|
|
$t .= $sub_token[1];
|
|
$in_ml_string = 1;
|
|
} else {
|
|
print "Warning: rm_comments_in_string length \@sub_token > 2\n";
|
|
}
|
|
|
|
}
|
|
#printf " t1 = [$t]\n";
|
|
$new_line .= $t;
|
|
}
|
|
push @save_lines, $new_line;
|
|
}
|
|
|
|
print "[", join("]\n[", @save_lines), "]\n" if $opt_v > 4;
|
|
print "<- rm_comments_in_strings\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_between_general { # {{{1
|
|
my ($ra_lines, $start_marker, $end_marker, ) = @_;
|
|
# Start and end markers may be any length strings.
|
|
|
|
print "-> remove_between_general(start=$start_marker, end=$end_marker)\n"
|
|
if $opt_v > 2;
|
|
|
|
my $all_lines = join("", @{$ra_lines});
|
|
|
|
my @save_lines = ();
|
|
my $in_comment = 0;
|
|
foreach (@{$ra_lines}) {
|
|
|
|
next if /^\s*$/;
|
|
s/\Q$start_marker\E.*?\Q$end_marker\E//g; # strip one-line comments
|
|
next if /^\s*$/;
|
|
if ($in_comment) {
|
|
if (/\Q$end_marker\E/) {
|
|
s/^.*?\Q$end_marker\E//;
|
|
$in_comment = 0;
|
|
}
|
|
next if $in_comment;
|
|
}
|
|
next if /^\s*$/;
|
|
$in_comment = 1 if /^(.*?)\Q$start_marker\E/; # $1 may be blank or code
|
|
next if defined $1 and $1 =~ /^\s*$/; # leading blank; all comment
|
|
if ($in_comment) {
|
|
# part code, part comment; strip the comment and keep the code
|
|
s/^(.*?)\Q$start_marker\E.*$/$1/;
|
|
}
|
|
push @save_lines, $_;
|
|
}
|
|
|
|
print "<- remove_between_general\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_between_regex { # {{{1
|
|
my ($ra_lines, $start_RE, $end_RE, ) = @_;
|
|
# Start and end regex's may be any length strings.
|
|
|
|
print "-> remove_between_regex(start=$start_RE, end=$end_RE)\n"
|
|
if $opt_v > 2;
|
|
|
|
my $all_lines = join("", @{$ra_lines});
|
|
|
|
my @save_lines = ();
|
|
my $in_comment = 0;
|
|
foreach (@{$ra_lines}) {
|
|
|
|
next if /^\s*$/;
|
|
s/${start_RE}.*?${end_RE}//g; # strip one-line comments
|
|
next if /^\s*$/;
|
|
if ($in_comment) {
|
|
if (/$end_RE/) {
|
|
s/^.*?${end_RE}//;
|
|
$in_comment = 0;
|
|
}
|
|
next if $in_comment;
|
|
}
|
|
next if /^\s*$/;
|
|
$in_comment = 1 if /^(.*?)${start_RE}/; # $1 may be blank or code
|
|
next if defined $1 and $1 =~ /^\s*$/; # leading blank; all comment
|
|
if ($in_comment) {
|
|
# part code, part comment; strip the comment and keep the code
|
|
s/^(.*?)${start_RE}.*$/$1/;
|
|
}
|
|
push @save_lines, $_;
|
|
}
|
|
|
|
print "<- remove_between_regex\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub replace_regex { # {{{1
|
|
my ($ra_lines, $regex, $replace, ) = @_;
|
|
|
|
print "-> replace_regex(regex=$regex, replace=$replace)\n"
|
|
if $opt_v > 2;
|
|
|
|
my $all_lines = join("", @{$ra_lines});
|
|
|
|
my @save_lines = ();
|
|
my $in_comment = 0;
|
|
foreach (@{$ra_lines}) {
|
|
|
|
next if /^\s*$/;
|
|
s/${regex}/${replace}/g;
|
|
next if /^\s*$/;
|
|
push @save_lines, $_;
|
|
}
|
|
|
|
print "<- replace_regex\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub replace_between_regex { # {{{1
|
|
my ($ra_lines, $start_RE, $end_RE, $replace_RE, $multiline_mode ) = @_;
|
|
# If multiline_mode is enabled, $replace_RE should not refer
|
|
# to any captured groups in $start_RE.
|
|
$multiline_mode = 1 if not defined $multiline_mode;
|
|
# Start and end regex's may be any length strings.
|
|
|
|
print "-> replace_between_regex(start=$start_RE, end=$end_RE, replace=$replace_RE)\n"
|
|
if $opt_v > 2;
|
|
|
|
my $all_lines = join("", @{$ra_lines});
|
|
|
|
my @save_lines = ();
|
|
my $in_comment = 0;
|
|
foreach (@{$ra_lines}) {
|
|
|
|
next if /^\s*$/;
|
|
s/${start_RE}.*?${end_RE}/${replace_RE}/eeg; # strip one-line comments
|
|
next if /^\s*$/;
|
|
if ($in_comment) {
|
|
if (/$end_RE/) {
|
|
s/^.*?${end_RE}/${replace_RE}/ee;
|
|
$in_comment = 0;
|
|
}
|
|
next if $in_comment;
|
|
}
|
|
next if /^\s*$/;
|
|
$in_comment = 1 if $multiline_mode and /^(.*?)${start_RE}/ ; # $1 may be blank or code
|
|
next if defined $1 and $1 =~ /^\s*$/; # leading blank; all comment
|
|
if ($in_comment) {
|
|
# part code, part comment; strip the comment and keep the code
|
|
s/^(.*?)${start_RE}.*$/$1/;
|
|
}
|
|
push @save_lines, $_;
|
|
}
|
|
|
|
print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
|
|
print "<- replace_between_regex\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_cobol_blanks { # {{{1
|
|
# subroutines derived from SLOCCount
|
|
my ($ra_lines, ) = @_;
|
|
|
|
my $free_format = 0; # Support "free format" source code.
|
|
my @save_lines = ();
|
|
|
|
foreach (@{$ra_lines}) {
|
|
next if m/^\s*$/;
|
|
my $line = expand($_); # convert tabs to equivalent spaces
|
|
$free_format = 1 if $line =~ m/^......\$.*SET.*SOURCEFORMAT.*FREE/i;
|
|
if ($free_format) {
|
|
push @save_lines, $_;
|
|
} else {
|
|
# Greg Toth:
|
|
# (1) Treat lines with any alphanum in cols 1-6 and
|
|
# blanks in cols 7 through 71 as blank line, and
|
|
# (2) Treat lines with any alphanum in cols 1-6 and
|
|
# slash (/) in col 7 as blank line (this is a
|
|
# page eject directive).
|
|
push @save_lines, $_ unless m/^\d{6}\s*$/ or
|
|
($line =~ m/^.{6}\s{66}/) or
|
|
($line =~ m/^......\//);
|
|
}
|
|
}
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_cobol_comments { # {{{1
|
|
# subroutines derived from SLOCCount
|
|
my ($ra_lines, ) = @_;
|
|
|
|
my $free_format = 0; # Support "free format" source code.
|
|
my @save_lines = ();
|
|
|
|
foreach (@{$ra_lines}) {
|
|
if (m/^......\$.*SET.*SOURCEFORMAT.*FREE/i) {$free_format = 1;}
|
|
if ($free_format) {
|
|
push @save_lines, $_ unless m{^\s*\*};
|
|
} else {
|
|
push @save_lines, $_ unless m{^......\*} or m{^\*};
|
|
}
|
|
}
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_jcl_comments { # {{{1
|
|
my ($ra_lines, ) = @_;
|
|
|
|
print "-> remove_jcl_comments\n" if $opt_v > 2;
|
|
|
|
my @save_lines = ();
|
|
my $in_comment = 0;
|
|
foreach (@{$ra_lines}) {
|
|
next if /^\s*$/;
|
|
next if m{^//\*};
|
|
last if m{^\s*//\s*$};
|
|
push @save_lines, $_;
|
|
}
|
|
|
|
print "<- remove_jcl_comments\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_jsp_comments { # {{{1
|
|
# JSP comment is <%-- body of comment --%>
|
|
my ($ra_lines, ) = @_;
|
|
|
|
print "-> remove_jsp_comments\n" if $opt_v > 2;
|
|
|
|
my @save_lines = ();
|
|
my $in_comment = 0;
|
|
foreach (@{$ra_lines}) {
|
|
|
|
next if /^\s*$/;
|
|
s/<\%\-\-.*?\-\-\%>//g; # strip one-line comments
|
|
next if /^\s*$/;
|
|
if ($in_comment) {
|
|
if (/\-\-\%>/) {
|
|
s/^.*?\-\-\%>//;
|
|
$in_comment = 0;
|
|
}
|
|
}
|
|
next if /^\s*$/;
|
|
$in_comment = 1 if /^(.*?)<\%\-\-/;
|
|
next if defined $1 and $1 =~ /^\s*$/;
|
|
next if ($in_comment);
|
|
push @save_lines, $_;
|
|
}
|
|
|
|
print "<- remove_jsp_comments\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_html_comments { # {{{1
|
|
# HTML comment is <!-- body of comment -->
|
|
# Need to use my own routine until the HTML comment regex in
|
|
# the Regexp::Common module can handle <!-- -- -->
|
|
my ($ra_lines, ) = @_;
|
|
|
|
print "-> remove_html_comments\n" if $opt_v > 2;
|
|
|
|
my @save_lines = ();
|
|
my $in_comment = 0;
|
|
foreach (@{$ra_lines}) {
|
|
|
|
next if /^\s*$/;
|
|
s/<!\-\-.*?\-\->//g; # strip one-line comments
|
|
next if /^\s*$/;
|
|
if ($in_comment) {
|
|
if (/\-\->/) {
|
|
s/^.*?\-\->//;
|
|
$in_comment = 0;
|
|
}
|
|
}
|
|
next if /^\s*$/;
|
|
$in_comment = 1 if /^(.*?)<!\-\-/;
|
|
if (defined $1 and $1 !~ /^\s*$/) {
|
|
# has both code and comment
|
|
push @save_lines, "$1\n";
|
|
next;
|
|
}
|
|
next if ($in_comment);
|
|
push @save_lines, $_;
|
|
}
|
|
|
|
print "<- remove_html_comments\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_bf_comments { # {{{1
|
|
my ($ra_lines, ) = @_;
|
|
|
|
print "-> remove_bf_comments\n" if $opt_v > 2;
|
|
|
|
my @save_lines = ();
|
|
my $in_comment = 0;
|
|
foreach (@{$ra_lines}) {
|
|
|
|
s/[^<>+-.,\[\]]+//g;
|
|
next if /^\s*$/;
|
|
push @save_lines, $_;
|
|
}
|
|
|
|
print "<- remove_bf_comments\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub really_is_bf { # {{{1
|
|
my ($file, ) = @_;
|
|
|
|
print "-> really_is_bf\n" if $opt_v > 2;
|
|
my $n_bf_indicators = 0;
|
|
my @lines = read_file($file);
|
|
foreach my $L (@lines) {
|
|
my $ind = 0;
|
|
if ($L =~ /([+-]{4,} | # at least four +'s or -'s in a row
|
|
[\[\]]{4,} | # at least four [ or ] in a row
|
|
[<>][+-] | # >- or >+ or <+ or <-
|
|
<{3,} | # at least three < in a row
|
|
^\s*[\[\]]\s*$)/x) { # [ or ] on line by itself
|
|
++$n_bf_indicators;
|
|
$ind = 1;
|
|
}
|
|
# if ($ind) { print "YES: $L"; } else { print "NO : $L"; }
|
|
}
|
|
my $ratio = scalar(@lines) > 0 ? $n_bf_indicators / scalar(@lines) : 0;
|
|
my $decision = ($ratio > 0.5) || ($n_bf_indicators > 5);
|
|
printf "<- really_is_bf(Y/N=%d %s, R=%.3f, N=%d)\n",
|
|
$decision, $file, $ratio, $n_bf_indicators if $opt_v > 2;
|
|
return $decision;
|
|
} # 1}}}
|
|
sub remove_indented_block { # {{{1
|
|
# Haml block comments are defined by a silent comment marker like
|
|
# /
|
|
# or
|
|
# -#
|
|
# followed by indented text on subsequent lines.
|
|
# http://haml.info/docs/yardoc/file.REFERENCE.html#comments
|
|
my ($ra_lines, $regex, ) = @_;
|
|
|
|
print "-> remove_indented_block\n" if $opt_v > 2;
|
|
|
|
my @save_lines = ();
|
|
my $in_comment = 0;
|
|
foreach (@{$ra_lines}) {
|
|
|
|
next if /^\s*$/;
|
|
my $line = expand($_); # convert tabs to equivalent spaces
|
|
if ($in_comment) {
|
|
$line =~ /^(\s*)/;
|
|
# print "indent=", length $1, "\n";
|
|
if (length $1 < $in_comment) {
|
|
# indent level is less than comment level
|
|
# are back in code
|
|
$in_comment = 0;
|
|
} else {
|
|
# still in comments, don't use this line
|
|
next;
|
|
}
|
|
} elsif ($line =~ m{$regex}) {
|
|
if ($1) {
|
|
$in_comment = length($1) + 1; # number of leading spaces + 1
|
|
} else {
|
|
$in_comment = 1;
|
|
}
|
|
# print "in_comment=$in_comment\n";
|
|
next;
|
|
}
|
|
push @save_lines, $line;
|
|
}
|
|
|
|
print "<- remove_indented_block\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_haml_block { # {{{1
|
|
# Haml block comments are defined by a silent comment marker like
|
|
# /
|
|
# or
|
|
# -#
|
|
# followed by indented text on subsequent lines.
|
|
# http://haml.info/docs/yardoc/file.REFERENCE.html#comments
|
|
my ($ra_lines, ) = @_;
|
|
|
|
return remove_indented_block($ra_lines, '^(\s*)(/|-#)\s*$');
|
|
|
|
} # 1}}}
|
|
sub remove_pug_block { # {{{1
|
|
# Haml block comments are defined by a silent comment marker like
|
|
# //
|
|
# followed by indented text on subsequent lines.
|
|
# http://jade-lang.com/reference/comments/
|
|
my ($ra_lines, ) = @_;
|
|
return remove_indented_block($ra_lines, '^(\s*)(//)\s*$');
|
|
} # 1}}}
|
|
sub remove_OCaml_comments { # {{{1
|
|
my ($ra_lines, ) = @_;
|
|
|
|
print "-> remove_OCaml_comments\n" if $opt_v > 2;
|
|
|
|
my @save_lines = ();
|
|
my $in_comment = 0; # counter to depth of nested comments
|
|
foreach my $L (@{$ra_lines}) {
|
|
next if $L =~ /^\s*$/;
|
|
# make an array of tokens where a token is a start comment
|
|
# marker, end comment marker, string, or anything else
|
|
my $clean_line = ""; # ie, free of comments
|
|
my @tokens = split(/(\(\*|\*\)|".*?")/, $L);
|
|
foreach my $t (@tokens) {
|
|
next unless $t;
|
|
if ($t eq "(*") {
|
|
++$in_comment;
|
|
} elsif ($t eq "*)") {
|
|
--$in_comment;
|
|
} elsif (!$in_comment) {
|
|
$clean_line .= $t;
|
|
}
|
|
}
|
|
push @save_lines, $clean_line if $clean_line;
|
|
}
|
|
print "<- remove_OCaml_comments\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_TLAPlus_generated_code { # {{{1
|
|
my ($ra_lines, ) = @_;
|
|
# If --no-autogen, remove code created by the PlusCal translator.
|
|
return @{$ra_lines} if !$opt_no_autogen;
|
|
return remove_between_regex($ra_lines, '^\\\\\\* BEGIN TRANSLATION\b', '^\\\\\\* END TRANSLATION\b');
|
|
} # 1}}}
|
|
sub remove_TLAPlus_comments { # {{{1
|
|
my ($ra_lines, ) = @_;
|
|
# TLA+ block comments are like OCaml comments (remove_OCaml_comments).
|
|
# However, TLA+ comments may contain PlusCal code, and we must consider the
|
|
# PlusCal code as code, not comments.
|
|
|
|
print "-> remove_TLAPlus_comments\n" if $opt_v > 2;
|
|
|
|
my @save_lines = ();
|
|
my $in_comment = 0; # counter to depth of nested (* *) comments
|
|
my $in_pluscal_head = 0; # whether we saw the start of PlusCal code
|
|
my $in_pluscal_block = 0; # counter to depth of nested { } blocks
|
|
my $saved_in_comment = 0; # $in_comment before we entered PlusCal code
|
|
foreach my $L (@{$ra_lines}) {
|
|
next if $L =~ /^\s*$/;
|
|
# make an array of interesting tokens we need to act upon
|
|
my $clean_line = ""; # ie, free of comments
|
|
my @tokens = split(/(\(\*|\*\)|".*?"|[{}]|--fair\b|--algorithm|PlusCal options)/, $L);
|
|
foreach my $t (@tokens) {
|
|
next unless $t;
|
|
if ($t eq "(*") {
|
|
++$in_comment;
|
|
} elsif ($t eq "*)") {
|
|
--$in_comment;
|
|
} elsif ($t eq "{" && $in_pluscal_head) {
|
|
# start block matching when we see '{' in '--algorithm NAME {'
|
|
$in_pluscal_head = 0;
|
|
$in_pluscal_block = 1;
|
|
$saved_in_comment = $in_comment;
|
|
$in_comment = 0;
|
|
$clean_line .= $t;
|
|
} elsif ($t eq "{" && $in_pluscal_block && !$in_comment) {
|
|
++$in_pluscal_block;
|
|
$clean_line .= $t;
|
|
} elsif ($t eq "}" && $in_pluscal_block && !$in_comment) {
|
|
--$in_pluscal_block;
|
|
if ($in_pluscal_block == 0) {
|
|
$in_comment = $saved_in_comment;
|
|
}
|
|
$clean_line .= $t;
|
|
} elsif ($t eq "--fair" || $t eq "--algorithm") {
|
|
$in_pluscal_head = 1;
|
|
$clean_line .= $t;
|
|
} elsif (!$in_comment || $in_pluscal_head || $in_pluscal_block || $t eq "PlusCal options") {
|
|
$clean_line .= $t;
|
|
}
|
|
}
|
|
push @save_lines, $clean_line if $clean_line;
|
|
}
|
|
print "<- remove_TLAPlus_comments\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub remove_slim_block { # {{{1
|
|
# slim comments start with /
|
|
# followed by indented text on subsequent lines.
|
|
# http://www.rubydoc.info/gems/slim/frames
|
|
my ($ra_lines, ) = @_;
|
|
return remove_indented_block($ra_lines, '^(\s*)(/[^!])');
|
|
} # 1}}}
|
|
sub add_newlines { # {{{1
|
|
my ($ra_lines, ) = @_;
|
|
print "-> add_newlines \n" if $opt_v > 2;
|
|
|
|
my @save_lines = ();
|
|
foreach (@{$ra_lines}) {
|
|
|
|
push @save_lines, "$_\n";
|
|
}
|
|
|
|
print "<- add_newlines \n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub docstring_to_C { # {{{1
|
|
my ($ra_lines, ) = @_;
|
|
# Converts Python docstrings to C comments.
|
|
|
|
if ($opt_docstring_as_code) {
|
|
return @{$ra_lines};
|
|
}
|
|
|
|
print "-> docstring_to_C()\n" if $opt_v > 2;
|
|
|
|
my $in_docstring = 0;
|
|
foreach (@{$ra_lines}) {
|
|
while (/((""")|('''))/) {
|
|
if (!$in_docstring) {
|
|
s{[uU]?((""")|('''))}{/*};
|
|
$in_docstring = 1;
|
|
} else {
|
|
s{((""")|('''))}{*/};
|
|
$in_docstring = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
print "<- docstring_to_C\n" if $opt_v > 2;
|
|
return @{$ra_lines};
|
|
} # 1}}}
|
|
sub docstring_rm_comments { # {{{1
|
|
my ($ra_lines, ) = @_;
|
|
# Remove embedded C/C++ style comments in docstrings.
|
|
|
|
print "-> docstring_rm_comments()\n" if $opt_v > 2;
|
|
|
|
my $in_docstring = 0;
|
|
foreach (@{$ra_lines}) {
|
|
if (/((""")|('''))(.*?)\1/) {
|
|
# single line docstring
|
|
my ($i_start, $i_end) = ($-[0]+3, $+[0]-3);
|
|
# replace /*, */, // with xx
|
|
substr($_, $i_start, $i_end-$i_start) =~ s{(/\*|\*/|//)}{xx}g;
|
|
next;
|
|
} elsif (m{/\*.*?((""")|(''')).*?\*/}) {
|
|
# docstring start or end within /* */ comments
|
|
my $i_start = $-[0]+2;
|
|
substr($_, $i_start, 3) = "xxx";
|
|
} elsif (m{//.*?((""")|('''))}) {
|
|
# docstring start or end after //
|
|
my $i_start = $-[0]+2;
|
|
substr($_, $i_start, 3) = "xxx";
|
|
} elsif (/^(.*?)((""")|('''))/ and $in_docstring) {
|
|
$in_docstring = 0;
|
|
my $i_end = length $1;
|
|
if ($i_end) {
|
|
substr($_, 0, $i_end) =~ s{(/\*|\*/|//)}{xx}g;
|
|
}
|
|
} elsif (/((""")|('''))(.*?)$/ and !$in_docstring) {
|
|
$in_docstring = 1;
|
|
my $i_start = $-[0]+3;
|
|
substr($_, $i_start) =~ s{(/\*|\*/|//)}{xx}g;
|
|
} elsif ($in_docstring) {
|
|
s{(/\*|\*/|//)}{xx}g;
|
|
}
|
|
}
|
|
|
|
print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
|
|
print "<- docstring_rm_comments\n" if $opt_v > 2;
|
|
return @{$ra_lines};
|
|
} # 1}}}
|
|
sub jupyter_nb { # {{{1
|
|
my ($ra_lines, ) = @_;
|
|
# Translate .ipynb file content into an equivalent set of code
|
|
# lines as expected by cloc.
|
|
|
|
print "-> jupyter_nb()\n" if $opt_v > 2;
|
|
|
|
my @save_lines = ();
|
|
my $in_code = 0;
|
|
my $in_source = 0;
|
|
foreach (@{$ra_lines}) {
|
|
if (!$in_code and !$in_source and /^\s*"cell_type":\s*"code",\s*$/) {
|
|
$in_code = 1;
|
|
} elsif ($in_code and !$in_source and /^\s*"source":\s*\[\s*$/) {
|
|
$in_source = 1;
|
|
} elsif ($in_code and $in_source) {
|
|
if (/^\s*"\s*\\n",\s*$/) { # "\n", -> empty line
|
|
next;
|
|
} elsif (/^\s*"\s*#/) { # comment within the code block
|
|
next;
|
|
} elsif (/^\s*\]\s*$/) {
|
|
$in_code = 0;
|
|
$in_source = 0;
|
|
} else {
|
|
push @save_lines, $_;
|
|
}
|
|
}
|
|
}
|
|
|
|
print "<- jupyter_nb\n" if $opt_v > 2;
|
|
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub elixir_doc_to_C { # {{{1
|
|
my ($ra_lines, ) = @_;
|
|
# Converts Elixir docs to C comments.
|
|
|
|
print "-> elixir_doc_to_C()\n" if $opt_v > 2;
|
|
|
|
my $in_docstring = 0;
|
|
foreach (@{$ra_lines}) {
|
|
if (!$in_docstring && /(\@(module)?doc\s+(~[sScC])?['"]{3})/) {
|
|
s{$1}{/*};
|
|
$in_docstring = 1;
|
|
} elsif ($in_docstring && /(['"]{3})/) {
|
|
s{$1}{*/};
|
|
$in_docstring = 0;
|
|
}
|
|
}
|
|
|
|
print "<- elixir_doc_to_C\n" if $opt_v > 2;
|
|
return @{$ra_lines};
|
|
} # 1}}}
|
|
sub Forth_paren_to_C { # {{{1
|
|
my ($ra_lines, ) = @_;
|
|
# Converts Forth comment parentheses to C comments.
|
|
|
|
print "-> Forth_paren_to_C()\n" if $opt_v > 2;
|
|
|
|
my $in_comment = 0;
|
|
my $max_paren_pair_per_line = 255;
|
|
foreach (@{$ra_lines}) {
|
|
#print "Forth_paren_to_C: [$_]\n";
|
|
my $n_iter = 0;
|
|
while (/\s\(\s/ or ($in_comment and /\)/)) {
|
|
#print "TOP n_iter=$n_iter in_comment=$in_comment\n";
|
|
if (/\s\(\s.*?\)/) {
|
|
# in-line parenthesis comment; handle here
|
|
s/\s+\(\s+.*?\)//g;
|
|
#print "B\n";
|
|
} elsif (!$in_comment and /\s\(\s/) {
|
|
s{\s+\(\s+}{/*};
|
|
#print "C\n";
|
|
$in_comment = 1;
|
|
} elsif ($in_comment and /\)/) {
|
|
s{\)}{*/};
|
|
#print "D\n";
|
|
$in_comment = 0;
|
|
} else {
|
|
# gets here if it can't find a matching
|
|
# close parenthesis; in this case the
|
|
# results will likely be incorrect
|
|
++$n_iter;
|
|
#print "E\n";
|
|
last if $n_iter > $max_paren_pair_per_line;
|
|
}
|
|
}
|
|
}
|
|
|
|
print "<- Forth_paren_to_C\n" if $opt_v > 2;
|
|
return @{$ra_lines};
|
|
} # 1}}}
|
|
sub powershell_to_C { # {{{1
|
|
my ($ra_lines, ) = @_;
|
|
# Converts PowerShell block comment markers to C comments.
|
|
|
|
print "-> powershell_to_C()\n" if $opt_v > 2;
|
|
|
|
my $in_docstring = 0;
|
|
foreach (@{$ra_lines}) {
|
|
s{<#}{/*}g;
|
|
s{#>}{*/}g;
|
|
}
|
|
|
|
print "<- powershell_to_C\n" if $opt_v > 2;
|
|
return @{$ra_lines};
|
|
} # 1}}}
|
|
sub smarty_to_C { # {{{1
|
|
my ($ra_lines, ) = @_;
|
|
# Converts Smarty comments to C comments.
|
|
|
|
print "-> smarty_to_C()\n" if $opt_v > 2;
|
|
|
|
foreach (@{$ra_lines}) {
|
|
s[{\*][/*]g;
|
|
s[\*}][*/]g;
|
|
}
|
|
|
|
print "<- smarty_to_C\n" if $opt_v > 2;
|
|
return @{$ra_lines};
|
|
} # 1}}}
|
|
sub determine_lit_type { # {{{1
|
|
my ($file) = @_;
|
|
|
|
my $FILE = open_file('<', $file, 0);
|
|
while (<$FILE>) {
|
|
if (m/^\\begin\{code\}/) { close $FILE; return 2; }
|
|
if (m/^>\s/) { close $FILE; return 1; }
|
|
}
|
|
|
|
return 0;
|
|
} # 1}}}
|
|
sub remove_haskell_comments { # {{{1
|
|
# SLOCCount's haskell_count script with modifications to handle
|
|
# Elm empty and nested block comments.
|
|
# Strips out {- .. -} and -- comments and counts the rest.
|
|
# Pragmas, {-#...}, are counted as SLOC.
|
|
# BUG: Doesn't handle strings with embedded block comment markers gracefully.
|
|
# In practice, that shouldn't be a problem.
|
|
my ($ra_lines, $file, ) = @_;
|
|
|
|
print "-> remove_haskell_comments\n" if $opt_v > 2;
|
|
|
|
my @save_lines = ();
|
|
my $in_comment = 0;
|
|
my $incomment = 0;
|
|
my $inlitblock = 0;
|
|
my $literate = 0;
|
|
my $is_elm = 0;
|
|
|
|
$is_elm = 1 if $file =~ /\.elm$/;
|
|
$literate = 1 if $file =~ /\.lhs$/;
|
|
if ($literate) { $literate = determine_lit_type($file) }
|
|
|
|
foreach (@{$ra_lines}) {
|
|
chomp;
|
|
if ($literate == 1) {
|
|
if (!s/^>//) { s/.*//; }
|
|
} elsif ($literate == 2) {
|
|
if ($inlitblock) {
|
|
if (m/^\\end\{code\}/) { s/.*//; $inlitblock = 0; }
|
|
} elsif (!$inlitblock) {
|
|
if (m/^\\begin\{code\}/) { s/.*//; $inlitblock = 1; }
|
|
else { s/.*//; }
|
|
}
|
|
}
|
|
|
|
# keep pragmas
|
|
if (/^\s*{-#/) {
|
|
push @save_lines, $_;
|
|
next;
|
|
}
|
|
|
|
# Elm allows nested comments so track nesting depth
|
|
# with $incomment.
|
|
|
|
my $n_open = () = $_ =~ /{-/g;
|
|
my $n_close = () = $_ =~ /-}/g;
|
|
s/{-.*?-}//g;
|
|
|
|
if ($incomment) {
|
|
if (m/\-\}/) {
|
|
s/^.*?\-\}//;
|
|
if ($is_elm) {
|
|
$incomment += $n_open - $n_close;
|
|
} else {
|
|
$incomment = 0;
|
|
}
|
|
} else {
|
|
s/.*//;
|
|
}
|
|
} else {
|
|
s/--.*//;
|
|
if (m/{-/ && (!m/{-#/)) {
|
|
s/{-.*//;
|
|
if ($is_elm) {
|
|
$incomment += $n_open - $n_close;
|
|
} else {
|
|
$incomment = 1;
|
|
}
|
|
}
|
|
}
|
|
if (m/\S/) { push @save_lines, $_; }
|
|
}
|
|
# if ($incomment) {print "ERROR: ended in comment in $ARGV\n";}
|
|
|
|
print "<- remove_haskell_comments\n" if $opt_v > 2;
|
|
return @save_lines;
|
|
} # 1}}}
|
|
sub print_lines { # {{{1
|
|
my ($file , # in
|
|
$title , # in
|
|
$ra_lines , # in
|
|
) = @_;
|
|
printf "->%-30s %s\n", $file, $title;
|
|
for (my $i = 0; $i < scalar @{$ra_lines}; $i++) {
|
|
printf "%5d | %s", $i+1, $ra_lines->[$i];
|
|
print "\n" unless $ra_lines->[$i] =~ m{\n$}
|
|
}
|
|
} # 1}}}
|
|
sub set_constants { # {{{1
|
|
my ($rh_Language_by_Extension , # out
|
|
$rh_Language_by_Script , # out
|
|
$rh_Language_by_File , # out
|
|
$rh_Language_by_Prefix , # out
|
|
$rhaa_Filters_by_Language , # out
|
|
$rh_Not_Code_Extension , # out
|
|
$rh_Not_Code_Filename , # out
|
|
$rh_Scale_Factor , # out
|
|
$rh_Known_Binary_Archives , # out
|
|
$rh_EOL_continuation_re , # out
|
|
) = @_;
|
|
# 1}}}
|
|
%{$rh_Language_by_Extension} = ( # {{{1
|
|
'abap' => 'ABAP' ,
|
|
'ac' => 'm4' ,
|
|
'ada' => 'Ada' ,
|
|
'adb' => 'Ada' ,
|
|
'ads' => 'Ada' ,
|
|
'adso' => 'ADSO/IDSM' ,
|
|
'ahkl' => 'AutoHotkey' ,
|
|
'ahk' => 'AutoHotkey' ,
|
|
'agda' => 'Agda' ,
|
|
'lagda' => 'Agda' ,
|
|
'aj' => 'AspectJ' ,
|
|
'am' => 'make' ,
|
|
'ample' => 'AMPLE' ,
|
|
'apl' => 'APL' ,
|
|
'apla' => 'APL' ,
|
|
'aplf' => 'APL' ,
|
|
'aplo' => 'APL' ,
|
|
'apln' => 'APL' ,
|
|
'aplc' => 'APL' ,
|
|
'apli' => 'APL' ,
|
|
'applescript' => 'AppleScript' ,
|
|
'dyalog' => 'APL' ,
|
|
'dyapp' => 'APL' ,
|
|
'mipage' => 'APL' ,
|
|
'art' => 'Arturo' ,
|
|
'as' => 'ActionScript' ,
|
|
'adoc' => 'AsciiDoc' ,
|
|
'asciidoc' => 'AsciiDoc' ,
|
|
'dofile' => 'AMPLE' ,
|
|
'startup' => 'AMPLE' ,
|
|
'axd' => 'ASP' ,
|
|
'ashx' => 'ASP' ,
|
|
'asa' => 'ASP' ,
|
|
'asax' => 'ASP.NET' ,
|
|
'ascx' => 'ASP.NET' ,
|
|
'asd' => 'Lisp' , # system definition file
|
|
'asmx' => 'ASP.NET' ,
|
|
'asp' => 'ASP' ,
|
|
'aspx' => 'ASP.NET' ,
|
|
'master' => 'ASP.NET' ,
|
|
'sitemap' => 'ASP.NET' ,
|
|
'nasm' => 'Assembly' ,
|
|
'a51' => 'Assembly' ,
|
|
'asm' => 'Assembly' ,
|
|
'astro' => 'Astro' ,
|
|
'asy' => 'Asymptote' ,
|
|
'cshtml' => 'Razor' ,
|
|
'razor' => 'Razor' , # Client-side Blazor
|
|
'nawk' => 'awk' ,
|
|
'mawk' => 'awk' ,
|
|
'gawk' => 'awk' ,
|
|
'auk' => 'awk' ,
|
|
'awk' => 'awk' ,
|
|
'bash' => 'Bourne Again Shell' ,
|
|
'bazel' => 'Starlark' ,
|
|
'BUILD' => 'Bazel' ,
|
|
'dxl' => 'DOORS Extension Language',
|
|
'bat' => 'DOS Batch' ,
|
|
'BAT' => 'DOS Batch' ,
|
|
'cmd' => 'DOS Batch' ,
|
|
'CMD' => 'DOS Batch' ,
|
|
'btm' => 'DOS Batch' ,
|
|
'BTM' => 'DOS Batch' ,
|
|
'blade' => 'Blade' ,
|
|
'blade.php' => 'Blade' ,
|
|
'build.xml' => 'Ant' ,
|
|
'b' => 'Brainfuck' ,
|
|
'bf' => 'Brainfuck' ,
|
|
'brs' => 'BrightScript' ,
|
|
'bzl' => 'Starlark' ,
|
|
'btp' => 'BizTalk Pipeline' ,
|
|
'odx' => 'BizTalk Orchestration' ,
|
|
'carbon' => 'Carbon' ,
|
|
'cpy' => 'COBOL' ,
|
|
'cobol' => 'COBOL' ,
|
|
'ccp' => 'COBOL' ,
|
|
'cbl' => 'COBOL' ,
|
|
'CBL' => 'COBOL' ,
|
|
'idc' => 'C' ,
|
|
'cats' => 'C' ,
|
|
'c' => 'C' ,
|
|
'c++' => 'C++' ,
|
|
'C' => 'C++' ,
|
|
'cc' => 'C++' ,
|
|
'ccm' => 'C++' ,
|
|
'c++m' => 'C++' ,
|
|
'cppm' => 'C++' ,
|
|
'cxxm' => 'C++' ,
|
|
'h++' => 'C++' ,
|
|
'inl' => 'C++' ,
|
|
'ipp' => 'C++' ,
|
|
'ixx' => 'C++' ,
|
|
'tcc' => 'C++' ,
|
|
'tpp' => 'C++' ,
|
|
'ccs' => 'CCS' ,
|
|
'cfc' => 'ColdFusion CFScript' ,
|
|
'cfml' => 'ColdFusion' ,
|
|
'cfm' => 'ColdFusion' ,
|
|
'chpl' => 'Chapel' ,
|
|
'cl' => 'Lisp/OpenCL' ,
|
|
'riemann.config'=> 'Clojure' ,
|
|
'hic' => 'Clojure' ,
|
|
'cljx' => 'Clojure' ,
|
|
'cljscm' => 'Clojure' ,
|
|
'cljs.hl' => 'Clojure' ,
|
|
'cl2' => 'Clojure' ,
|
|
'boot' => 'Clojure' ,
|
|
'clj' => 'Clojure' ,
|
|
'cljs' => 'ClojureScript' ,
|
|
'cljc' => 'ClojureC' ,
|
|
'cls' => 'Visual Basic/TeX/Apex Class' ,
|
|
'cmake.in' => 'CMake' ,
|
|
'CMakeLists.txt' => 'CMake' ,
|
|
'cmake' => 'CMake' ,
|
|
'cob' => 'COBOL' ,
|
|
'COB' => 'COBOL' ,
|
|
'cocoa5' => 'CoCoA 5' ,
|
|
'c5' => 'CoCoA 5' ,
|
|
'cpkg5' => 'CoCoA 5' ,
|
|
'cocoa5server'=> 'CoCoA 5' ,
|
|
'iced' => 'CoffeeScript' ,
|
|
'cjsx' => 'CoffeeScript' ,
|
|
'cakefile' => 'CoffeeScript' ,
|
|
'_coffee' => 'CoffeeScript' ,
|
|
'coffee' => 'CoffeeScript' ,
|
|
'component' => 'Visualforce Component' ,
|
|
'cg3' => 'Constraint Grammar' ,
|
|
'rlx' => 'Constraint Grammar' ,
|
|
'Containerfile' => 'Containerfile' ,
|
|
'cpp' => 'C++' ,
|
|
'CPP' => 'C++' ,
|
|
'cr' => 'Crystal' ,
|
|
'cs' => 'C#/Smalltalk' ,
|
|
'designer.cs' => 'C# Designer' ,
|
|
'cake' => 'Cake Build Script' ,
|
|
'csh' => 'C Shell' ,
|
|
'cson' => 'CSON' ,
|
|
'css' => "CSS" ,
|
|
'csv' => "CSV" ,
|
|
'cu' => 'CUDA' ,
|
|
'cuh' => 'CUDA' , # CUDA header file
|
|
'cxx' => 'C++' ,
|
|
'd' => 'D/dtrace' ,
|
|
# in addition, .d can map to init.d files typically written as
|
|
# bash or sh scripts
|
|
'dfy' => 'Dafny' ,
|
|
'da' => 'DAL' ,
|
|
'dart' => 'Dart' ,
|
|
'dsc' => 'DenizenScript' ,
|
|
'derw' => 'Derw' ,
|
|
'def' => 'Windows Module Definition',
|
|
'dhall' => 'dhall' ,
|
|
'dt' => 'DIET' ,
|
|
'patch' => 'diff' ,
|
|
'diff' => 'diff' ,
|
|
'dmap' => 'NASTRAN DMAP' ,
|
|
'sthlp' => 'Stata' ,
|
|
'matah' => 'Stata' ,
|
|
'mata' => 'Stata' ,
|
|
'ihlp' => 'Stata' ,
|
|
'doh' => 'Stata' ,
|
|
'ado' => 'Stata' ,
|
|
'do' => 'Stata' ,
|
|
'DO' => 'Stata' ,
|
|
'Dockerfile' => 'Dockerfile' ,
|
|
'dockerfile' => 'Dockerfile' ,
|
|
'pascal' => 'Pascal' ,
|
|
'lpr' => 'Pascal' ,
|
|
'dfm' => 'Delphi Form' ,
|
|
'dpr' => 'Pascal' ,
|
|
'dita' => 'DITA' ,
|
|
'drl' => 'Drools' ,
|
|
'dtd' => 'DTD' ,
|
|
'ec' => 'C' ,
|
|
'ecpp' => 'ECPP' ,
|
|
'eex' => 'EEx' ,
|
|
'el' => 'Lisp' ,
|
|
'elm' => 'Elm' ,
|
|
'exs' => 'Elixir' ,
|
|
'ex' => 'Elixir' ,
|
|
'ecr' => 'Embedded Crystal' ,
|
|
'ejs' => 'EJS' ,
|
|
'erb' => 'ERB' ,
|
|
'ERB' => 'ERB' ,
|
|
'ets' => 'ArkTs' , # OpenHarmonyOS app language
|
|
'yrl' => 'Erlang' ,
|
|
'xrl' => 'Erlang' ,
|
|
'rebar.lock' => 'Erlang' ,
|
|
'rebar.config.lock'=> 'Erlang' ,
|
|
'rebar.config'=> 'Erlang' ,
|
|
'emakefile' => 'Erlang' ,
|
|
'app.src' => 'Erlang' ,
|
|
'erl' => 'Erlang' ,
|
|
'exp' => 'Expect' ,
|
|
'4th' => 'Forth' ,
|
|
'fish' => 'Fish Shell' ,
|
|
'fsl' => 'Finite State Language' ,
|
|
'jssm' => 'Finite State Language' ,
|
|
'fnl' => 'Fennel' ,
|
|
'forth' => 'Forth' ,
|
|
'fr' => 'Forth' ,
|
|
'frt' => 'Forth' ,
|
|
'fth' => 'Forth' ,
|
|
'f83' => 'Forth' ,
|
|
'fb' => 'Forth' ,
|
|
'fpm' => 'Forth' ,
|
|
'e4' => 'Forth' ,
|
|
'rx' => 'Forth' ,
|
|
'ft' => 'Forth' ,
|
|
'f77' => 'Fortran 77' ,
|
|
'F77' => 'Fortran 77' ,
|
|
'f90' => 'Fortran 90' ,
|
|
'F90' => 'Fortran 90' ,
|
|
'f95' => 'Fortran 95' ,
|
|
'F95' => 'Fortran 95' ,
|
|
'f' => 'Fortran 77/Forth' ,
|
|
'F' => 'Fortran 77' ,
|
|
'for' => 'Fortran 77/Forth' ,
|
|
'FOR' => 'Fortran 77' ,
|
|
'ftl' => 'Freemarker Template' ,
|
|
'ftn' => 'Fortran 77' ,
|
|
'FTN' => 'Fortran 77' ,
|
|
'fmt' => 'Oracle Forms' ,
|
|
'focexec' => 'Focus' ,
|
|
'fs' => 'F#/Forth' ,
|
|
'fsi' => 'F#' ,
|
|
'fsx' => 'F# Script' ,
|
|
'fut' => 'Futhark' ,
|
|
'fxml' => 'FXML' ,
|
|
'gnumakefile' => 'make' ,
|
|
'Gnumakefile' => 'make' ,
|
|
'gd' => 'GDScript' ,
|
|
'gdshader' => 'Godot Shaders' ,
|
|
'vshader' => 'GLSL' ,
|
|
'vsh' => 'GLSL' ,
|
|
'vrx' => 'GLSL' ,
|
|
'gshader' => 'GLSL' ,
|
|
'glslv' => 'GLSL' ,
|
|
'geo' => 'GLSL' ,
|
|
'fshader' => 'GLSL' ,
|
|
'fsh' => 'GLSL' ,
|
|
'frg' => 'GLSL' ,
|
|
'fp' => 'GLSL' ,
|
|
'fbs' => 'Flatbuffers' ,
|
|
'gjs' => 'Glimmer JavaScript' ,
|
|
'gts' => 'Glimmer TypeScript' ,
|
|
'glsl' => 'GLSL' ,
|
|
'graphqls' => 'GraphQL' ,
|
|
'gql' => 'GraphQL' ,
|
|
'graphql' => 'GraphQL' ,
|
|
'vert' => 'GLSL' ,
|
|
'tesc' => 'GLSL' ,
|
|
'tese' => 'GLSL' ,
|
|
'geom' => 'GLSL' ,
|
|
'feature' => 'Cucumber' ,
|
|
'frag' => 'GLSL' ,
|
|
'comp' => 'GLSL' ,
|
|
'g' => 'ANTLR Grammar' ,
|
|
'g4' => 'ANTLR Grammar' ,
|
|
'gleam' => 'Gleam' ,
|
|
'go' => 'Go' ,
|
|
'ʕ◔ϖ◔ʔ' => 'Go' ,
|
|
'gsp' => 'Grails' ,
|
|
'jenkinsfile' => 'Groovy' ,
|
|
'gvy' => 'Groovy' ,
|
|
'gtpl' => 'Groovy' ,
|
|
'grt' => 'Groovy' ,
|
|
'groovy' => 'Groovy' ,
|
|
'gant' => 'Groovy' ,
|
|
'gradle' => 'Gradle' ,
|
|
'gradle.kts' => 'Gradle' ,
|
|
'h' => 'C/C++ Header' ,
|
|
'H' => 'C/C++ Header' ,
|
|
'hh' => 'C/C++ Header' ,
|
|
'hpp' => 'C/C++ Header' ,
|
|
'hxx' => 'C/C++ Header' ,
|
|
'hb' => 'Harbour' ,
|
|
'hrl' => 'Erlang' ,
|
|
'hsc' => 'Haskell' ,
|
|
'hs' => 'Haskell' ,
|
|
'tfvars' => 'HCL' ,
|
|
'hcl' => 'HCL' ,
|
|
'tf' => 'HCL' ,
|
|
'nomad' => 'HCL' ,
|
|
'hlsli' => 'HLSL' ,
|
|
'fxh' => 'HLSL' ,
|
|
'hlsl' => 'HLSL' ,
|
|
'shader' => 'HLSL' ,
|
|
'cg' => 'HLSL' ,
|
|
'cginc' => 'HLSL' ,
|
|
'haml.deface' => 'Haml' ,
|
|
'haml' => 'Haml' ,
|
|
'handlebars' => 'Handlebars' ,
|
|
'hbs' => 'Handlebars' ,
|
|
'ha' => 'Hare' ,
|
|
'hxsl' => 'Haxe' ,
|
|
'hx' => 'Haxe' ,
|
|
'HC' => 'HolyC' ,
|
|
'hoon' => 'Hoon' ,
|
|
'xht' => 'HTML' ,
|
|
'html.hl' => 'HTML' ,
|
|
'htm' => 'HTML' ,
|
|
'html' => 'HTML' ,
|
|
'heex' => 'HTML EEx' ,
|
|
'i3' => 'Modula3' ,
|
|
'ice' => 'Slice' ,
|
|
'icl' => 'Clean' ,
|
|
'dcl' => 'Clean' ,
|
|
'dlm' => 'IDL' ,
|
|
'idl' => 'IDL' ,
|
|
'idr' => 'Idris' ,
|
|
'lidr' => 'Literate Idris' ,
|
|
'imba' => 'Imba' ,
|
|
'prefs' => 'INI' ,
|
|
'lektorproject'=> 'INI' ,
|
|
'buildozer.spec'=> 'INI' ,
|
|
'ini' => 'INI' ,
|
|
'editorconfig'=> 'INI' ,
|
|
'ism' => 'InstallShield' ,
|
|
'ipl' => 'IPL' ,
|
|
'pro' => 'IDL/Qt Project/Prolog/ProGuard' ,
|
|
'ig' => 'Modula3' ,
|
|
'il' => 'SKILL' ,
|
|
'ils' => 'SKILL++' ,
|
|
'inc' => 'PHP/Pascal/Fortran' ,
|
|
'ino' => 'Arduino Sketch' ,
|
|
'ipf' => 'Igor Pro' ,
|
|
#'pde' => 'Arduino Sketch' , # pre 1.0
|
|
'pde' => 'Processing' , # pre 1.0
|
|
'itk' => 'Tcl/Tk' ,
|
|
'java' => 'Java' ,
|
|
'jcl' => 'JCL' , # IBM Job Control Lang.
|
|
'jl' => 'Lisp/Julia' ,
|
|
'jai' => 'Jai' ,
|
|
'janet' => 'Janet' ,
|
|
'xsjslib' => 'JavaScript' ,
|
|
'xsjs' => 'JavaScript' ,
|
|
'ssjs' => 'JavaScript' ,
|
|
'sjs' => 'JavaScript' ,
|
|
'pac' => 'JavaScript' ,
|
|
'njs' => 'JavaScript' ,
|
|
'mjs' => 'JavaScript' ,
|
|
'cjs' => 'JavaScript' ,
|
|
'jss' => 'JavaScript' ,
|
|
'jsm' => 'JavaScript' ,
|
|
'jsfl' => 'JavaScript' ,
|
|
'jscad' => 'JavaScript' ,
|
|
'jsb' => 'JavaScript' ,
|
|
'jakefile' => 'JavaScript' ,
|
|
'jake' => 'JavaScript' ,
|
|
'bones' => 'JavaScript' ,
|
|
'_js' => 'JavaScript' ,
|
|
'js' => 'JavaScript' ,
|
|
'es6' => 'JavaScript' ,
|
|
'jsf' => 'JavaServer Faces' ,
|
|
'jsx' => 'JSX' ,
|
|
'xhtml' => 'XHTML' ,
|
|
'j2' => 'Jinja Template' ,
|
|
'jinja' => 'Jinja Template' ,
|
|
'jinja2' => 'Jinja Template' ,
|
|
'yyp' => 'JSON' ,
|
|
'webmanifest' => 'JSON' ,
|
|
'webapp' => 'JSON' ,
|
|
'topojson' => 'JSON' ,
|
|
'tfstate.backup'=> 'JSON' ,
|
|
'tfstate' => 'JSON' ,
|
|
'mcmod.info' => 'JSON' ,
|
|
'mcmeta' => 'JSON' ,
|
|
'json-tmlanguage'=> 'JSON' ,
|
|
'jsonl' => 'JSON' ,
|
|
'har' => 'JSON' ,
|
|
'gltf' => 'JSON' ,
|
|
'geojson' => 'JSON' ,
|
|
'composer.lock'=> 'JSON' ,
|
|
'avsc' => 'JSON' ,
|
|
'watchmanconfig'=> 'JSON' ,
|
|
'tern-project'=> 'JSON' ,
|
|
'tern-config' => 'JSON' ,
|
|
'htmlhintrc' => 'JSON' ,
|
|
'arcconfig' => 'JSON' ,
|
|
'json' => 'JSON' ,
|
|
'json5' => 'JSON5' ,
|
|
'jsp' => 'JSP' , # Java server pages
|
|
'jspf' => 'JSP' , # Java server pages
|
|
'junos' => 'Juniper Junos' ,
|
|
'vm' => 'Velocity Template Language' ,
|
|
'kv' => 'kvlang' ,
|
|
'ksc' => 'Kermit' ,
|
|
'ksh' => 'Korn Shell' ,
|
|
'ktm' => 'Kotlin' ,
|
|
'kt' => 'Kotlin' ,
|
|
'kts' => 'Kotlin' ,
|
|
'hlean' => 'Lean' ,
|
|
'lean' => 'Lean' ,
|
|
'lhs' => 'Haskell' ,
|
|
'lex' => 'lex' ,
|
|
'l' => 'lex' ,
|
|
'ld' => 'Linker Script' ,
|
|
'lem' => 'Lem' ,
|
|
'less' => 'LESS' ,
|
|
'lfe' => 'LFE' ,
|
|
'liquid' => 'liquid' ,
|
|
'lsp' => 'Lisp' ,
|
|
'lisp' => 'Lisp' ,
|
|
'll' => 'LLVM IR' ,
|
|
'lgt' => 'Logtalk' ,
|
|
'logtalk' => 'Logtalk' ,
|
|
'lp' => 'AnsProlog' , # Answer Set Programming / clingo
|
|
'wlua' => 'Lua' ,
|
|
'rbxs' => 'Lua' ,
|
|
'pd_lua' => 'Lua' ,
|
|
'p8' => 'Lua' ,
|
|
'nse' => 'Lua' ,
|
|
'lua' => 'Lua' ,
|
|
'luau' => 'Luau' ,
|
|
'm3' => 'Modula3' ,
|
|
'm4' => 'm4' ,
|
|
'makefile' => 'make' ,
|
|
'Makefile' => 'make' ,
|
|
'mao' => 'Mako' ,
|
|
'mako' => 'Mako' ,
|
|
'workbook' => 'Markdown' ,
|
|
'ronn' => 'Markdown' ,
|
|
'mkdown' => 'Markdown' ,
|
|
'mkdn' => 'Markdown' ,
|
|
'mkd' => 'Markdown' ,
|
|
'mdx' => 'Markdown' ,
|
|
'mdwn' => 'Markdown' ,
|
|
'mdown' => 'Markdown' ,
|
|
'markdown' => 'Markdown' ,
|
|
'contents.lr' => 'Markdown' ,
|
|
'md' => 'Markdown' ,
|
|
'mc' => 'Windows Message File' ,
|
|
'met' => 'Teamcenter met' ,
|
|
'mg' => 'Modula3' ,
|
|
'mojom' => 'Mojom' ,
|
|
'mojo' => 'Mojo' ,
|
|
'🔥' => 'Mojo' ,
|
|
'meson.build' => 'Meson' ,
|
|
'metal' => 'Metal' ,
|
|
'mk' => 'make' ,
|
|
# 'mli' => 'ML' , # ML not implemented
|
|
# 'ml' => 'ML' ,
|
|
'ml4' => 'OCaml' ,
|
|
'eliomi' => 'OCaml' ,
|
|
'eliom' => 'OCaml' ,
|
|
'ml' => 'OCaml' ,
|
|
'mli' => 'OCaml' ,
|
|
'mly' => 'OCaml' ,
|
|
'mll' => 'OCaml' ,
|
|
'm' => 'MATLAB/Mathematica/Objective-C/MUMPS/Mercury' ,
|
|
'mm' => 'Objective-C++' ,
|
|
'msg' => 'Gencat NLS' ,
|
|
'nbp' => 'Mathematica' ,
|
|
'mathematica' => 'Mathematica' ,
|
|
'ma' => 'Mathematica' ,
|
|
'cdf' => 'Mathematica' ,
|
|
'mt' => 'Mathematica' ,
|
|
'wl' => 'Mathematica' ,
|
|
'wlt' => 'Mathematica' ,
|
|
'mo' => 'Modelica' ,
|
|
'mustache' => 'Mustache' ,
|
|
'wdproj' => 'MSBuild script' ,
|
|
'csproj' => 'MSBuild script' ,
|
|
'vcproj' => 'MSBuild script' ,
|
|
'wixproj' => 'MSBuild script' ,
|
|
'btproj' => 'MSBuild script' ,
|
|
'msbuild' => 'MSBuild script' ,
|
|
'sln' => 'Visual Studio Solution',
|
|
'mps' => 'MUMPS' ,
|
|
'mth' => 'Teamcenter mth' ,
|
|
'n' => 'Nemerle' ,
|
|
'nlogo' => 'NetLogo' ,
|
|
'nls' => 'NetLogo' ,
|
|
'ncl' => 'Nickel' ,
|
|
'nims' => 'Nim' ,
|
|
'nimrod' => 'Nim' ,
|
|
'nimble' => 'Nim' ,
|
|
'nim.cfg' => 'Nim' ,
|
|
'nim' => 'Nim' ,
|
|
'nix' => 'Nix' ,
|
|
'nut' => 'Squirrel' ,
|
|
'njk' => 'Nunjucks' ,
|
|
'odin' => 'Odin' ,
|
|
'oscript' => 'LiveLink OScript' ,
|
|
'bod' => 'Oracle PL/SQL' ,
|
|
'spc' => 'Oracle PL/SQL' ,
|
|
'fnc' => 'Oracle PL/SQL' ,
|
|
'prc' => 'Oracle PL/SQL' ,
|
|
'trg' => 'Oracle PL/SQL' ,
|
|
'pad' => 'Ada' , # Oracle Ada preprocessor
|
|
'page' => 'Visualforce Page' ,
|
|
'pas' => 'Pascal' ,
|
|
'pcc' => 'C++' , # Oracle C++ preprocessor
|
|
'rexfile' => 'Perl' ,
|
|
'psgi' => 'Perl' ,
|
|
'ph' => 'Perl' ,
|
|
'makefile.pl' => 'Perl' ,
|
|
'cpanfile' => 'Perl' ,
|
|
'al' => 'Perl' ,
|
|
'ack' => 'Perl' ,
|
|
'perl' => 'Perl' ,
|
|
'pfo' => 'Fortran 77' ,
|
|
'pgc' => 'C' , # Postgres embedded C/C++
|
|
'phpt' => 'PHP' ,
|
|
'phps' => 'PHP' ,
|
|
'phakefile' => 'PHP' ,
|
|
'ctp' => 'PHP' ,
|
|
'aw' => 'PHP' ,
|
|
'php_cs.dist' => 'PHP' ,
|
|
'php_cs' => 'PHP' ,
|
|
'php3' => 'PHP' ,
|
|
'php4' => 'PHP' ,
|
|
'php5' => 'PHP' ,
|
|
'php' => 'PHP' ,
|
|
'phtml' => 'PHP' ,
|
|
'pig' => 'Pig Latin' ,
|
|
'plh' => 'Perl' ,
|
|
'pl' => 'Perl/Prolog' ,
|
|
'PL' => 'Perl/Prolog' ,
|
|
'p6' => 'Raku/Prolog' ,
|
|
'P6' => 'Raku/Prolog' ,
|
|
'plx' => 'Perl' ,
|
|
'pm' => 'Perl' ,
|
|
'pm6' => 'Raku' ,
|
|
'raku' => 'Raku' ,
|
|
'rakumod' => 'Raku' ,
|
|
'pom.xml' => 'Maven' ,
|
|
'pom' => 'Maven' ,
|
|
'scad' => 'OpenSCAD' ,
|
|
'yap' => 'Prolog' ,
|
|
'prolog' => 'Prolog' ,
|
|
'P' => 'Prolog' ,
|
|
'p' => 'Pascal' ,
|
|
'pp' => 'Pascal/Puppet' ,
|
|
'viw' => 'SQL' ,
|
|
'udf' => 'SQL' ,
|
|
'tab' => 'SQL' ,
|
|
'mysql' => 'SQL' ,
|
|
'cql' => 'SQL' ,
|
|
'psql' => 'SQL' ,
|
|
'xpy' => 'Python' ,
|
|
'wsgi' => 'Python' ,
|
|
'wscript' => 'Python' ,
|
|
'workspace' => 'Python' ,
|
|
'tac' => 'Python' ,
|
|
'snakefile' => 'Python' ,
|
|
'sconstruct' => 'Python' ,
|
|
'sconscript' => 'Python' ,
|
|
'pyt' => 'Python' ,
|
|
'pyp' => 'Python' ,
|
|
'pyi' => 'Python' ,
|
|
'pyde' => 'Python' ,
|
|
'py3' => 'Python' ,
|
|
'lmi' => 'Python' ,
|
|
'gypi' => 'Python' ,
|
|
'gyp' => 'Python' ,
|
|
'build.bazel' => 'Python' ,
|
|
'buck' => 'Python' ,
|
|
'gclient' => 'Python' ,
|
|
'py' => 'Python' ,
|
|
'pyw' => 'Python' ,
|
|
'ipynb' => 'Jupyter Notebook' ,
|
|
'pyj' => 'RapydScript' ,
|
|
'pxi' => 'Cython' ,
|
|
'pxd' => 'Cython' ,
|
|
'pyx' => 'Cython' ,
|
|
'qbs' => 'QML' ,
|
|
'qml' => 'QML' ,
|
|
'watchr' => 'Ruby' ,
|
|
'vagrantfile' => 'Ruby' ,
|
|
'thorfile' => 'Ruby' ,
|
|
'thor' => 'Ruby' ,
|
|
'snapfile' => 'Ruby' ,
|
|
'ru' => 'Ruby' ,
|
|
'rbx' => 'Ruby' ,
|
|
'rbw' => 'Ruby' ,
|
|
'rbuild' => 'Ruby' ,
|
|
'rabl' => 'Ruby' ,
|
|
'puppetfile' => 'Ruby' ,
|
|
'podfile' => 'Ruby' ,
|
|
'mspec' => 'Ruby' ,
|
|
'mavenfile' => 'Ruby' ,
|
|
'jbuilder' => 'Ruby' ,
|
|
'jarfile' => 'Ruby' ,
|
|
'guardfile' => 'Ruby' ,
|
|
'god' => 'Ruby' ,
|
|
'gemspec' => 'Ruby' ,
|
|
'gemfile.lock'=> 'Ruby' ,
|
|
'gemfile' => 'Ruby' ,
|
|
'fastfile' => 'Ruby' ,
|
|
'eye' => 'Ruby' ,
|
|
'deliverfile' => 'Ruby' ,
|
|
'dangerfile' => 'Ruby' ,
|
|
'capfile' => 'Ruby' ,
|
|
'buildfile' => 'Ruby' ,
|
|
'builder' => 'Ruby' ,
|
|
'brewfile' => 'Ruby' ,
|
|
'berksfile' => 'Ruby' ,
|
|
'appraisals' => 'Ruby' ,
|
|
'pryrc' => 'Ruby' ,
|
|
'irbrc' => 'Ruby' ,
|
|
'rb' => 'Ruby' ,
|
|
'podspec' => 'Ruby' ,
|
|
'rake' => 'Ruby' ,
|
|
# 'resx' => 'ASP.NET' ,
|
|
'rex' => 'Oracle Reports' ,
|
|
'pprx' => 'Rexx' ,
|
|
'rexx' => 'Rexx' ,
|
|
'rhtml' => 'Ruby HTML' ,
|
|
'circom' => 'Circom' ,
|
|
'cairo' => 'Cairo' ,
|
|
'rs.in' => 'Rust' ,
|
|
'rs' => 'Rust' ,
|
|
'rst.txt' => 'reStructuredText' ,
|
|
'rest.txt' => 'reStructuredText' ,
|
|
'rest' => 'reStructuredText' ,
|
|
'rst' => 'reStructuredText' ,
|
|
's' => 'Assembly' ,
|
|
'S' => 'Assembly' ,
|
|
'SCA' => 'Visual Fox Pro' ,
|
|
'sca' => 'Visual Fox Pro' ,
|
|
'sbt' => 'Scala' ,
|
|
'kojo' => 'Scala' ,
|
|
'scala' => 'Scala' ,
|
|
'sbl' => 'Softbridge Basic' ,
|
|
'SBL' => 'Softbridge Basic' ,
|
|
'sed' => 'sed' ,
|
|
'sp' => 'SparForte' ,
|
|
'sol' => 'Solidity' ,
|
|
'p4' => 'P4' ,
|
|
'ses' => 'Patran Command Language' ,
|
|
'pcl' => 'Patran Command Language' ,
|
|
'peg' => 'PEG' ,
|
|
'pegjs' => 'peg.js' ,
|
|
'peggy' => 'peggy' ,
|
|
'pest' => 'Pest' ,
|
|
'prisma' => 'Prisma Schema' ,
|
|
'tspeg' => 'tspeg' ,
|
|
'jspeg' => 'tspeg' ,
|
|
'pl1' => 'PL/I' ,
|
|
'plm' => 'PL/M' ,
|
|
'lit' => 'PL/M' ,
|
|
'iuml' => 'PlantUML' ,
|
|
'pu' => 'PlantUML' ,
|
|
'puml' => 'PlantUML' ,
|
|
'plantuml' => 'PlantUML' ,
|
|
'wsd' => 'PlantUML' ,
|
|
'properties' => 'Properties' ,
|
|
'po' => 'PO File' ,
|
|
'pony' => 'Pony' ,
|
|
'pbt' => 'PowerBuilder' ,
|
|
'sra' => 'PowerBuilder' ,
|
|
'srf' => 'PowerBuilder' ,
|
|
'srm' => 'PowerBuilder' ,
|
|
'srs' => 'PowerBuilder' ,
|
|
'sru' => 'PowerBuilder' ,
|
|
'srw' => 'PowerBuilder' ,
|
|
'jade' => 'Pug' ,
|
|
'pug' => 'Pug' ,
|
|
'purs' => 'PureScript' ,
|
|
'prefab' => 'Unity-Prefab' ,
|
|
'proto' => 'Protocol Buffers' ,
|
|
'mat' => 'Unity-Prefab' ,
|
|
'ps1' => 'PowerShell' ,
|
|
'psd1' => 'PowerShell' ,
|
|
'psm1' => 'PowerShell' ,
|
|
'prql' => 'PRQL' ,
|
|
'rsx' => 'R' ,
|
|
'rd' => 'R' ,
|
|
'expr-dist' => 'R' ,
|
|
'rprofile' => 'R' ,
|
|
'R' => 'R' ,
|
|
'r' => 'R' ,
|
|
'raml' => 'RAML' ,
|
|
'ring' => 'Ring' ,
|
|
'rh' => 'Ring' ,
|
|
'rform' => 'Ring' ,
|
|
'rktd' => 'Racket' ,
|
|
'rkt' => 'Racket' ,
|
|
'rktl' => 'Racket' ,
|
|
'Rmd' => 'Rmd' ,
|
|
're' => 'ReasonML' ,
|
|
'rei' => 'ReasonML' ,
|
|
'res' => 'ReScript' ,
|
|
'resi' => 'ReScript' ,
|
|
'scrbl' => 'Racket' ,
|
|
'sps' => 'Scheme' ,
|
|
'sc' => 'Scheme' ,
|
|
'ss' => 'Scheme' ,
|
|
'scm' => 'Scheme' ,
|
|
'sch' => 'Scheme' ,
|
|
'sls' => 'Scheme/SaltStack' ,
|
|
'sld' => 'Scheme' ,
|
|
'robot' => 'RobotFramework' ,
|
|
'rc' => 'Windows Resource File' ,
|
|
'rc2' => 'Windows Resource File' ,
|
|
'sas' => 'SAS' ,
|
|
'sass' => 'Sass' ,
|
|
'scss' => 'SCSS' ,
|
|
'sh' => 'Bourne Shell' ,
|
|
'smarty' => 'Smarty' ,
|
|
'sml' => 'Standard ML' ,
|
|
'sig' => 'Standard ML' ,
|
|
'fun' => 'Standard ML' ,
|
|
'slim' => 'Slim' ,
|
|
'e' => 'Specman e' ,
|
|
'sql' => 'SQL' ,
|
|
'SQL' => 'SQL' ,
|
|
'sproc.sql' => 'SQL Stored Procedure' ,
|
|
'spoc.sql' => 'SQL Stored Procedure' ,
|
|
'spc.sql' => 'SQL Stored Procedure' ,
|
|
'udf.sql' => 'SQL Stored Procedure' ,
|
|
'data.sql' => 'SQL Data' ,
|
|
'sss' => 'SugarSS' ,
|
|
'slint' => 'Slint' ,
|
|
'st' => 'Smalltalk' ,
|
|
'rules' => 'Snakemake' ,
|
|
'smk' => 'Snakemake' ,
|
|
'styl' => 'Stylus' ,
|
|
'i' => 'SWIG' ,
|
|
'svelte' => 'Svelte' ,
|
|
'sv' => 'Verilog-SystemVerilog' ,
|
|
'svh' => 'Verilog-SystemVerilog' ,
|
|
'svg' => 'SVG' ,
|
|
'SVG' => 'SVG' ,
|
|
'v' => 'Verilog-SystemVerilog/Coq' ,
|
|
'td' => 'TableGen' ,
|
|
'tcl' => 'Tcl/Tk' ,
|
|
'tcsh' => 'C Shell' ,
|
|
'tk' => 'Tcl/Tk' ,
|
|
'teal' => 'TEAL' ,
|
|
'mkvi' => 'TeX' ,
|
|
'mkiv' => 'TeX' ,
|
|
'mkii' => 'TeX' ,
|
|
'ltx' => 'TeX' ,
|
|
'lbx' => 'TeX' ,
|
|
'ins' => 'TeX' ,
|
|
'cbx' => 'TeX' ,
|
|
'bib' => 'TeX' ,
|
|
'bbx' => 'TeX' ,
|
|
'aux' => 'TeX' ,
|
|
'tex' => 'TeX' , # TeX, LaTex, MikTex, ..
|
|
'toml' => 'TOML' ,
|
|
'sty' => 'TeX' ,
|
|
# 'cls' => 'TeX' ,
|
|
'dtx' => 'TeX' ,
|
|
'bst' => 'TeX' ,
|
|
'txt' => 'Text' ,
|
|
'text' => 'Text' ,
|
|
'tres' => 'Godot Resource' ,
|
|
'tscn' => 'Godot Scene' ,
|
|
'thrift' => 'Thrift' ,
|
|
'tla' => 'TLA+' ,
|
|
'tpl' => 'Smarty' ,
|
|
'trigger' => 'Apex Trigger' ,
|
|
'ttcn' => 'TTCN' ,
|
|
'ttcn2' => 'TTCN' ,
|
|
'ttcn3' => 'TTCN' ,
|
|
'ttcnpp' => 'TTCN' ,
|
|
'sdl' => 'TNSDL' ,
|
|
'ssc' => 'TNSDL' ,
|
|
'sdt' => 'TNSDL' ,
|
|
'spd' => 'TNSDL' ,
|
|
'sst' => 'TNSDL' ,
|
|
'rou' => 'TNSDL' ,
|
|
'cin' => 'TNSDL' ,
|
|
'cii' => 'TNSDL' ,
|
|
'interface' => 'TNSDL' ,
|
|
'in1' => 'TNSDL' ,
|
|
'in2' => 'TNSDL' ,
|
|
'in3' => 'TNSDL' ,
|
|
'in4' => 'TNSDL' ,
|
|
'inf' => 'TNSDL' ,
|
|
'tpd' => 'TITAN Project File Information',
|
|
'ts' => 'TypeScript/Qt Linguist',
|
|
'mts' => 'TypeScript' ,
|
|
'tsx' => 'TypeScript' ,
|
|
'tss' => 'Titanium Style Sheet' ,
|
|
'twig' => 'Twig' ,
|
|
'typ' => 'Typst' ,
|
|
'um' => 'Umka' ,
|
|
'ui' => 'Qt/Glade' ,
|
|
'glade' => 'Glade' ,
|
|
'vala' => 'Vala' ,
|
|
'vapi' => 'Vala Header' ,
|
|
'vhw' => 'VHDL' ,
|
|
'vht' => 'VHDL' ,
|
|
'vhs' => 'VHDL' ,
|
|
'vho' => 'VHDL' ,
|
|
'vhi' => 'VHDL' ,
|
|
'vhf' => 'VHDL' ,
|
|
'vhd' => 'VHDL' ,
|
|
'VHD' => 'VHDL' ,
|
|
'vhdl' => 'VHDL' ,
|
|
'VHDL' => 'VHDL' ,
|
|
'bas' => 'Visual Basic' ,
|
|
'BAS' => 'Visual Basic' ,
|
|
'ctl' => 'Visual Basic' ,
|
|
'dsr' => 'Visual Basic' ,
|
|
'frm' => 'Visual Basic' ,
|
|
'frx' => 'Visual Basic' ,
|
|
'FRX' => 'Visual Basic' ,
|
|
'vba' => 'VB for Applications' ,
|
|
'VBA' => 'VB for Applications' ,
|
|
'vbhtml' => 'Visual Basic' ,
|
|
'VBHTML' => 'Visual Basic' ,
|
|
'vbproj' => 'Visual Basic .NET' ,
|
|
'vbp' => 'Visual Basic' , # .vbp - autogenerated
|
|
'vbs' => 'Visual Basic Script' ,
|
|
'VBS' => 'Visual Basic Script' ,
|
|
'vb' => 'Visual Basic .NET' ,
|
|
'VB' => 'Visual Basic .NET' ,
|
|
'vbw' => 'Visual Basic' , # .vbw - autogenerated
|
|
'vue' => 'Vuejs Component' ,
|
|
'vy' => 'Vyper' ,
|
|
'webinfo' => 'ASP.NET' ,
|
|
'wsdl' => 'Web Services Description',
|
|
'x' => 'Logos' ,
|
|
'xm' => 'Logos' ,
|
|
'xpo' => 'X++' , # Microsoft Dynamics AX 4.0 export format
|
|
'xmi' => 'XMI' ,
|
|
'XMI' => 'XMI' ,
|
|
'zcml' => 'XML' ,
|
|
'xul' => 'XML' ,
|
|
'xspec' => 'XML' ,
|
|
'xproj' => 'XML' ,
|
|
'xml.dist' => 'XML' ,
|
|
'xliff' => 'XML' ,
|
|
'xlf' => 'XML' ,
|
|
'xib' => 'XML' ,
|
|
'xacro' => 'XML' ,
|
|
'x3d' => 'XML' ,
|
|
'wsf' => 'XML' ,
|
|
'web.release.config'=> 'XML' ,
|
|
'web.debug.config'=> 'XML' ,
|
|
'web.config' => 'XML' ,
|
|
'wxml' => 'WXML' ,
|
|
'wxss' => 'WXSS' ,
|
|
'vxml' => 'XML' ,
|
|
'vstemplate' => 'XML' ,
|
|
'vssettings' => 'XML' ,
|
|
'vsixmanifest'=> 'XML' ,
|
|
'vcxproj' => 'XML' ,
|
|
'ux' => 'XML' ,
|
|
'urdf' => 'XML' ,
|
|
'tmtheme' => 'XML' ,
|
|
'tmsnippet' => 'XML' ,
|
|
'tmpreferences'=> 'XML' ,
|
|
'tmlanguage' => 'XML' ,
|
|
'tml' => 'XML' ,
|
|
'tmcommand' => 'XML' ,
|
|
'targets' => 'XML' ,
|
|
'sublime-snippet'=> 'XML' ,
|
|
'sttheme' => 'XML' ,
|
|
'storyboard' => 'XML' ,
|
|
'srdf' => 'XML' ,
|
|
'shproj' => 'XML' ,
|
|
'sfproj' => 'XML' ,
|
|
'settings.stylecop'=> 'XML' ,
|
|
'scxml' => 'XML' ,
|
|
'rss' => 'XML' ,
|
|
'resx' => 'XML' ,
|
|
'rdf' => 'XML' ,
|
|
'pt' => 'XML' ,
|
|
'psc1' => 'XML' ,
|
|
'ps1xml' => 'XML' ,
|
|
'props' => 'XML' ,
|
|
'proj' => 'XML' ,
|
|
'plist' => 'XML' ,
|
|
'pkgproj' => 'XML' ,
|
|
'packages.config'=> 'XML' ,
|
|
'osm' => 'XML' ,
|
|
'odd' => 'XML' ,
|
|
'nuspec' => 'XML' ,
|
|
'nuget.config'=> 'XML' ,
|
|
'nproj' => 'XML' ,
|
|
'ndproj' => 'XML' ,
|
|
'natvis' => 'XML' ,
|
|
'mjml' => 'XML' ,
|
|
'mdpolicy' => 'XML' ,
|
|
'launch' => 'XML' ,
|
|
'kml' => 'XML' ,
|
|
'jsproj' => 'XML' ,
|
|
'jelly' => 'XML' ,
|
|
'ivy' => 'XML' ,
|
|
'iml' => 'XML' ,
|
|
'grxml' => 'XML' ,
|
|
'gmx' => 'XML' ,
|
|
'fsproj' => 'XML' ,
|
|
'filters' => 'XML' ,
|
|
'dotsettings' => 'XML' ,
|
|
'dll.config' => 'XML' ,
|
|
'ditaval' => 'XML' ,
|
|
'ditamap' => 'XML' ,
|
|
'depproj' => 'XML' ,
|
|
'ct' => 'XML' ,
|
|
'csl' => 'XML' ,
|
|
'csdef' => 'XML' ,
|
|
'cscfg' => 'XML' ,
|
|
'cproject' => 'XML' ,
|
|
'clixml' => 'XML' ,
|
|
'ccxml' => 'XML' ,
|
|
'ccproj' => 'XML' ,
|
|
'builds' => 'XML' ,
|
|
'axml' => 'XML' ,
|
|
'app.config' => 'XML' ,
|
|
'ant' => 'XML' ,
|
|
'admx' => 'XML' ,
|
|
'adml' => 'XML' ,
|
|
'project' => 'XML' ,
|
|
'classpath' => 'XML' ,
|
|
'xml' => 'XML' ,
|
|
'XML' => 'XML' ,
|
|
'mxml' => 'MXML' ,
|
|
'xml.builder' => 'builder' ,
|
|
'build' => 'NAnt script' ,
|
|
'vim' => 'vim script' ,
|
|
'swift' => 'Swift' ,
|
|
'xaml' => 'XAML' ,
|
|
'wast' => 'WebAssembly' ,
|
|
'wat' => 'WebAssembly' ,
|
|
'wgsl' => 'WGSL' ,
|
|
'wxs' => 'WiX source' ,
|
|
'wxi' => 'WiX include' ,
|
|
'wxl' => 'WiX string localization' ,
|
|
'prw' => 'xBase' ,
|
|
'prg' => 'xBase' ,
|
|
'ch' => 'xBase Header' ,
|
|
'xqy' => 'XQuery' ,
|
|
'xqm' => 'XQuery' ,
|
|
'xql' => 'XQuery' ,
|
|
'xq' => 'XQuery' ,
|
|
'xquery' => 'XQuery' ,
|
|
'xsd' => 'XSD' ,
|
|
'XSD' => 'XSD' ,
|
|
'xslt' => 'XSLT' ,
|
|
'XSLT' => 'XSLT' ,
|
|
'xsl' => 'XSLT' ,
|
|
'XSL' => 'XSLT' ,
|
|
'xtend' => 'Xtend' ,
|
|
'yacc' => 'yacc' ,
|
|
'y' => 'yacc' ,
|
|
'yml.mysql' => 'YAML' ,
|
|
'yaml-tmlanguage'=> 'YAML' ,
|
|
'syntax' => 'YAML' ,
|
|
'sublime-syntax'=> 'YAML' ,
|
|
'rviz' => 'YAML' ,
|
|
'reek' => 'YAML' ,
|
|
'mir' => 'YAML' ,
|
|
'glide.lock' => 'YAML' ,
|
|
'gemrc' => 'YAML' ,
|
|
'clang-tidy' => 'YAML' ,
|
|
'clang-format'=> 'YAML' ,
|
|
'yaml' => 'YAML' ,
|
|
'yml' => 'YAML' ,
|
|
'zig' => 'Zig' ,
|
|
'zsh' => 'zsh' ,
|
|
);
|
|
# 1}}}
|
|
%{$rh_Language_by_Script} = ( # {{{1
|
|
'awk' => 'awk' ,
|
|
'bash' => 'Bourne Again Shell' ,
|
|
'bc' => 'bc' ,# calculator
|
|
'crystal' => 'Crystal' ,
|
|
'csh' => 'C Shell' ,
|
|
'dmd' => 'D' ,
|
|
'dtrace' => 'dtrace' ,
|
|
'escript' => 'Erlang' ,
|
|
'groovy' => 'Groovy' ,
|
|
'idl' => 'IDL' ,
|
|
'kermit' => 'Kermit' ,
|
|
'ksh' => 'Korn Shell' ,
|
|
'lua' => 'Lua' ,
|
|
'luau' => 'Luau' ,
|
|
'make' => 'make' ,
|
|
'octave' => 'Octave' ,
|
|
'perl5' => 'Perl' ,
|
|
'perl' => 'Perl' ,
|
|
'miniperl' => 'Perl' ,
|
|
'php' => 'PHP' ,
|
|
'php5' => 'PHP' ,
|
|
'python' => 'Python' ,
|
|
'python2.6'=> 'Python' ,
|
|
'python2.7'=> 'Python' ,
|
|
'python3' => 'Python' ,
|
|
'python3.3'=> 'Python' ,
|
|
'python3.4'=> 'Python' ,
|
|
'python3.5'=> 'Python' ,
|
|
'python3.6'=> 'Python' ,
|
|
'python3.7'=> 'Python' ,
|
|
'python3.8'=> 'Python' ,
|
|
'perl6' => 'Raku' ,
|
|
'raku' => 'Raku' ,
|
|
'rakudo' => 'Raku' ,
|
|
'rexx' => 'Rexx' ,
|
|
'regina' => 'Rexx' ,
|
|
'ruby' => 'Ruby' ,
|
|
'sed' => 'sed' ,
|
|
'sh' => 'Bourne Shell' ,
|
|
'swipl' => 'Prolog' ,
|
|
'tcl' => 'Tcl/Tk' ,
|
|
'tclsh' => 'Tcl/Tk' ,
|
|
'tcsh' => 'C Shell' ,
|
|
'wish' => 'Tcl/Tk' ,
|
|
'zsh' => 'zsh' ,
|
|
);
|
|
# 1}}}
|
|
%{$rh_Language_by_File} = ( # {{{1
|
|
'build.xml' => 'Ant/XML' ,
|
|
'BUILD' => 'Bazel' ,
|
|
'WORKSPACE' => 'Bazel' ,
|
|
'cmakelists.txt' => 'CMake' ,
|
|
'CMakeLists.txt' => 'CMake' ,
|
|
'Jamfile' => 'Jam' ,
|
|
'jamfile' => 'Jam' ,
|
|
'Jamrules' => 'Jam' ,
|
|
'Makefile' => 'make' ,
|
|
'makefile' => 'make' ,
|
|
'meson.build' => 'Meson' ,
|
|
'Gnumakefile' => 'make' ,
|
|
'gnumakefile' => 'make' ,
|
|
'pom.xml' => 'Maven/XML' ,
|
|
'Rakefile' => 'Ruby' ,
|
|
'rakefile' => 'Ruby' ,
|
|
'Snakefile' => 'Snakemake' ,
|
|
'Dockerfile' => 'Dockerfile' ,
|
|
'Dockerfile.m4' => 'Dockerfile' ,
|
|
'Dockerfile.cmake' => 'Dockerfile' ,
|
|
'dockerfile' => 'Dockerfile' ,
|
|
'dockerfile.m4' => 'Dockerfile' ,
|
|
'dockerfile.cmake' => 'Dockerfile' ,
|
|
'Containerfile' => 'Containerfile' ,
|
|
);
|
|
# 1}}}
|
|
%{$rh_Language_by_Prefix} = ( # {{{1
|
|
'Dockerfile' => 'Dockerfile' ,
|
|
'Containerfile' => 'Containerfile' ,
|
|
);
|
|
# 1}}}
|
|
%{$rhaa_Filters_by_Language} = ( # {{{1
|
|
'(unknown)' => [ ],
|
|
'ABAP' => [ [ 'remove_matches' , '^\*' ], ],
|
|
'ActionScript' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Apex Class' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'AppleScript' => [
|
|
[ 'remove_matches' , '^\s*--' ],
|
|
[ 'rm_comments_in_strings', '"', '(*', '*)' ],
|
|
[ 'remove_between_general', '(*', '*)' ],
|
|
[ 'remove_inline' , '--.*$' ],
|
|
],
|
|
'ASP' => [ [ 'remove_matches' , '^\s*\47'], ], # \47 = '
|
|
'ASP.NET' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_between_general', '<%--', '--%>' ],
|
|
[ 'remove_between_general', '<!--', '-->' ],
|
|
],
|
|
'Ada' => [ [ 'remove_matches' , '^\s*--' ], ],
|
|
'ADSO/IDSM' => [ [ 'remove_matches' , '^\s*\*[\+\!]' ], ],
|
|
'Agda' => [ [ 'remove_haskell_comments', '>filename<' ], ],
|
|
'AMPLE' => [ [ 'remove_matches' , '^\s*//' ], ],
|
|
'APL' => [
|
|
[ 'remove_matches' , '^\s*⍝' ],
|
|
],
|
|
'AnsProlog' => [
|
|
[ 'remove_between_general', '%*', '*%' ],
|
|
[ 'remove_matches' , '^\s*\%' ],
|
|
[ 'remove_inline' , '(//|\%).*$' ],
|
|
],
|
|
'Ant/XML' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'ANTLR Grammar' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Ant' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'Apex Trigger' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Arduino Sketch' => [ # Arduino IDE inserts problematic 0xA0 characters; strip them
|
|
[ 'replace_regex' , '\xa0', " " ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'ArkTs' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Arturo' => [
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
[ 'remove_inline' , ';.*$' ],
|
|
],
|
|
'AsciiDoc' => [
|
|
[ 'remove_between_general', '////', '////' ],
|
|
[ 'remove_matches' , '^\s*\/\/' ],
|
|
],
|
|
'AspectJ' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Assembly' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
[ 'remove_matches' , '^\s*\@' ],
|
|
[ 'remove_matches' , '^\s*\|' ],
|
|
[ 'remove_matches' , '^\s*!' ],
|
|
[ 'remove_matches' , '^\s*--' ],
|
|
[ 'remove_inline' , ';.*$' ],
|
|
[ 'remove_inline' , '\@.*$' ],
|
|
[ 'remove_inline' , '\|.*$' ],
|
|
[ 'remove_inline' , '!.*$' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
[ 'remove_inline' , '--.*$' ],
|
|
[ 'remove_matches' , '^\*' ], # z/OS Assembly
|
|
],
|
|
'Astro' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Asymptote' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'AutoHotkey' => [
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
[ 'remove_inline' , ';.*$' ],
|
|
],
|
|
'awk' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Bazel' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'bc' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Blade' => [
|
|
[ 'remove_between_general', '{{--', '--}}' ],
|
|
[ 'remove_html_comments', ],
|
|
],
|
|
'Bourne Again Shell' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Bourne Shell' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Brainfuck' => [ # puerile name for a language
|
|
# [ 'call_regexp_common' , 'Brainfuck' ], # inaccurate
|
|
[ 'remove_bf_comments', ],
|
|
],
|
|
'BrightScript' => [
|
|
[ 'remove_matches' , '^\s*rem', ],
|
|
[ 'remove_matches' , '^\s*\'', ],
|
|
],
|
|
'builder' => [
|
|
[ 'remove_matches' , '^\s*xml_markup.comment!' ],
|
|
],
|
|
'C' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Chapel' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'C++' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'C/C++ Header' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Carbon' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Clean' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Clojure' => [
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
[ 'remove_matches' , '^\s*#_' ],
|
|
],
|
|
'ClojureScript' => [ [ 'remove_matches' , '^\s*;' ], ],
|
|
'ClojureC' => [ [ 'remove_matches' , '^\s*;' ], ],
|
|
'CMake' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Crystal' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Constraint Grammar' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'CUDA' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Cython' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'docstring_to_C' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'C#/Smalltalk' => [ [ 'die' , ], ], # never called
|
|
'C#' => [
|
|
# [ 'remove_matches' , '^\s*//' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'C# Designer' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'C# Generated' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Cake Build Script' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'CCS' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
],
|
|
'CSS' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
],
|
|
'CSV' => [ # comma separated value files have no comments;
|
|
[ 'remove_matches' , '^\s*$' ],
|
|
], # included simply to allow diff's
|
|
'COBOL' => [ [ 'remove_cobol_comments', ], ],
|
|
'CoCoA 5' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
[ 'remove_matches' , '^\s*--' ],
|
|
[ 'remove_inline' , '--.*$' ],
|
|
],
|
|
'CoffeeScript' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'ColdFusion' => [ [ 'remove_html_comments', ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'HTML' ], ],
|
|
'ColdFusion CFScript'=> [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Containerfile' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Coq' => [
|
|
[ 'remove_between_general', '(*', '*)' ],
|
|
],
|
|
'Crystal Reports' => [ [ 'remove_matches' , '^\s*//' ], ],
|
|
'CSON' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Cucumber' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
],
|
|
'D/dtrace' => [ [ 'die' , ], ], # never called
|
|
'D' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'rm_comments_in_strings', '"', '/+', '+/' ],
|
|
[ 'remove_between_general', '/+', '+/' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'DAL' => [
|
|
[ 'remove_between_general', '[', ']', ],
|
|
],
|
|
'Dart' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'DenizenScript' => [ # same as YAML
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Derw' => [
|
|
[ 'remove_matches' , '^\s*--' ],
|
|
[ 'remove_between_general', '{-', '-}' ],
|
|
[ 'remove_inline' , '--.*$' ],
|
|
],
|
|
'Dafny' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'dhall' => [ [ 'remove_haskell_comments', '>filename<' ], ],
|
|
'Delphi Form' => [ # same as Pascal
|
|
[ 'remove_between_regex', '\{[^$]', '}' ],
|
|
[ 'remove_between_general', '(*', '*)' ],
|
|
[ 'remove_matches' , '^\s*//' ],
|
|
],
|
|
'DIET' => [ # same as Pug
|
|
[ 'remove_pug_block' , ],
|
|
[ 'remove_matches' , '^\s*//' ],
|
|
[ 'remove_inline' , '//.*$' ],
|
|
],
|
|
# diff is kind of weird: anything but a space in the first column
|
|
# will count as code, with the exception of #, ---, +++. Spaces
|
|
# in the first column denote context lines which aren't part of the
|
|
# difference.
|
|
'diff' => [
|
|
[ 'remove_matches' , '^#' ],
|
|
[ 'remove_matches' , '^\-\-\-' ],
|
|
[ 'remove_matches' , '^\+\+\+' ],
|
|
[ 'remove_matches' , '^\s' ],
|
|
],
|
|
'DITA' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'DOORS Extension Language' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Drools' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'dtrace' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'ECPP' => [
|
|
[ 'remove_between_general',
|
|
'<%doc>', '</%doc>', ],
|
|
[ 'remove_between_general',
|
|
'<#' , '#>' , ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'EEx' => [
|
|
[ 'remove_between_general', '<%#', '%>' ],
|
|
],
|
|
'EJS' => [
|
|
[ 'remove_between_general', '<%#', '%>' ],
|
|
[ 'remove_html_comments', ],
|
|
],
|
|
'Elm' => [ [ 'remove_haskell_comments', '>filename<' ], ],
|
|
'Embedded Crystal' => [
|
|
[ 'remove_between_general', '<%#', '%>' ],
|
|
],
|
|
'ERB' => [
|
|
[ 'remove_between_general', '<%#', '%>' ],
|
|
],
|
|
'Gencat NLS' => [ [ 'remove_matches' , '^\$ .*$' ], ],
|
|
'NASTRAN DMAP' => [
|
|
[ 'remove_matches' , '^\s*\$' ],
|
|
[ 'remove_inline' , '\$.*$' ],
|
|
],
|
|
'Dockerfile' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'DOS Batch' => [
|
|
[ 'remove_matches' , '^\s*rem' ],
|
|
[ 'remove_matches' , '^\s*::' ],
|
|
],
|
|
'DTD' => [ [ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ], ],
|
|
'Elixir' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'elixir_doc_to_C' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Erlang' => [
|
|
[ 'remove_matches' , '^\s*%' ],
|
|
[ 'remove_inline' , '%.*$' ],
|
|
],
|
|
'Expect' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Fennel' => [
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
[ 'remove_inline' , ';.*$' ],
|
|
],
|
|
'Finite State Language' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Fish Shell' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Focus' => [ [ 'remove_matches' , '^\s*\-\*' ], ],
|
|
'Forth' => [
|
|
[ 'remove_matches' , '^\s*\\\\.*$' ],
|
|
[ 'Forth_paren_to_C' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
[ 'remove_inline' , '\\\\.*$' ],
|
|
],
|
|
'Fortran 77' => [
|
|
[ 'remove_f77_comments' , ],
|
|
[ 'remove_inline' , '\!.*$' ],
|
|
],
|
|
'Fortran 77/Forth' => [ [ 'die' , ], ], # never called
|
|
'F#/Forth' => [ [ 'die' , ], ], # never called
|
|
'Fortran 90' => [
|
|
[ 'remove_f77_comments' , ],
|
|
[ 'remove_f90_comments' , ],
|
|
[ 'remove_inline' , '\!.*$' ],
|
|
],
|
|
'Fortran 95' => [
|
|
[ 'remove_f77_comments' , ],
|
|
[ 'remove_f90_comments' , ],
|
|
[ 'remove_inline' , '\!.*$' ],
|
|
],
|
|
'Freemarker Template' => [
|
|
[ 'remove_between_general', '<#--', '-->' ],
|
|
],
|
|
'Futhark' => [
|
|
[ 'remove_matches' , '^\s*--' ],
|
|
],
|
|
'FXML' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'F#' => [
|
|
[ 'remove_between_general', '(*', '*)' ],
|
|
[ 'remove_matches' , '^\s*//' ],
|
|
],
|
|
'F# Script' => [
|
|
[ 'call_regexp_common' , 'Pascal' ],
|
|
[ 'remove_matches' , '^\s*//' ],
|
|
],
|
|
'Flatbuffers' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
[ 'remove_inline' , '//.*$' ],
|
|
],
|
|
'Godot Scene' => [
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
],
|
|
'Godot Resource' => [
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
],
|
|
'Godot Shaders' => [
|
|
[ 'remove_matches' , '^\s*//' ],
|
|
[ 'remove_inline' , '//.*$' ],
|
|
],
|
|
'GDScript' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Glade' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'Gleam' => [
|
|
[ 'remove_matches' , '^\s*//' ],
|
|
[ 'remove_inline' , '//.*$' ],
|
|
],
|
|
'Glimmer JavaScript' => [
|
|
[ 'remove_between_general', '{{!', '}}' ],
|
|
[ 'remove_between_general', '<!--', '-->' ],
|
|
[ 'rm_comments_in_strings', "'", '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', "'", '//', '' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Glimmer TypeScript' => [
|
|
[ 'remove_between_general', '{{!', '}}' ],
|
|
[ 'remove_between_general', '<!--', '-->' ],
|
|
[ 'rm_comments_in_strings', "'", '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', "'", '//', '' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'GLSL' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Go' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '["`]', '*/*', '' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
[ 'remove_inline' , '//.*$' ],
|
|
],
|
|
'Gradle' => [ # same as Groovy
|
|
[ 'remove_inline' , '//.*$' ],
|
|
# separate /* inside quoted strings with two
|
|
# concatenated strings split between / and *
|
|
[ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\1',
|
|
'(.*?)' , '"$1$2$1 + $1$3$1$4"', 0],
|
|
[ 'rm_comments_in_strings', '"""', '/*', '*/', 1],
|
|
[ 'rm_comments_in_strings', '"""', '//', '', 1],
|
|
[ 'rm_comments_in_strings', "'''", '/*', '*/', 1],
|
|
[ 'rm_comments_in_strings', "'''", '//', '', 1],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Grails' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
[ 'remove_jsp_comments' , ],
|
|
[ 'add_newlines' , ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'GraphQL' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'docstring_to_C' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Groovy' => [
|
|
[ 'remove_inline' , '//.*$' ],
|
|
# separate /* inside quoted strings with two
|
|
# concatenated strings split between / and *
|
|
[ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\1',
|
|
'(.*?)' , '"$1$2$1 + $1$3$1$4"', 0],
|
|
[ 'rm_comments_in_strings', '"""', '/*', '*/', 1],
|
|
[ 'rm_comments_in_strings', '"""', '//', '', 1],
|
|
[ 'rm_comments_in_strings', "'''", '/*', '*/', 1],
|
|
[ 'rm_comments_in_strings', "'''", '//', '', 1],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Haml' => [
|
|
[ 'remove_haml_block' , ],
|
|
[ 'remove_html_comments', ],
|
|
[ 'remove_matches' , '^\s*/\s*\S+' ],
|
|
[ 'remove_matches' , '^\s*-#\s*\S+' ],
|
|
],
|
|
'Handlebars' => [
|
|
[ 'remove_between_general', '{{!--', '--}}' ],
|
|
[ 'remove_between_general', '{{!', '}}' ],
|
|
[ 'remove_html_comments', ],
|
|
],
|
|
'Harbour' => [
|
|
[ 'remove_matches' , '^\s*\&\&' ],
|
|
[ 'remove_matches' , '^\s*\*' ],
|
|
[ 'remove_matches' , '^\s*NOTE' ],
|
|
[ 'remove_matches' , '^\s*note' ],
|
|
[ 'remove_matches' , '^\s*Note' ],
|
|
[ 'remove_inline' , '//.*$' ],
|
|
[ 'remove_inline' , '\&\&.*$' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Hare' => [
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'remove_matches' , '//.*$' ],
|
|
],
|
|
'Haxe' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'HCL' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'HLSL' => [
|
|
[ 'remove_inline' , '//.*$' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'HTML' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'HTML EEx' => [
|
|
[ 'remove_matches' , '^\s*<% #' ],
|
|
[ 'remove_between_general', '<%!--', '--%>' ],
|
|
],
|
|
'HolyC' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Hoon' => [
|
|
[ 'remove_matches' , '^\s*:[:><]' ],
|
|
[ 'remove_inline' , ':[:><].*$' ],
|
|
],
|
|
'INI' => [
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
],
|
|
'XHTML' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'Haskell' => [ [ 'remove_haskell_comments', '>filename<' ], ],
|
|
'IDL' => [ [ 'remove_matches' , '^\s*;' ], ],
|
|
'IDL/Qt Project/Prolog/ProGuard' => [ [ 'die' , ], ], # never called
|
|
'Idris' => [
|
|
[ 'remove_haskell_comments', '>filename<' ],
|
|
[ 'remove_matches' , '^\s*\|{3}' ],
|
|
],
|
|
'Igor Pro' => [
|
|
[ 'remove_matches' , '^\s*//' ],
|
|
[ 'remove_inline' , '//.*$' ],
|
|
],
|
|
'Literate Idris' => [
|
|
[ 'remove_matches' , '^[^>]' ],
|
|
],
|
|
'Imba' => [
|
|
[ 'remove_matches' , '^\s*#\s'],
|
|
[ 'remove_inline' , '#\s.*$' ],
|
|
[ 'remove_between_regex', '###', '###' ],
|
|
],
|
|
'InstallShield' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'IPL' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Jai' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Jam' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Janet' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'JSP' => [ [ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
[ 'remove_jsp_comments' , ],
|
|
[ 'remove_matches' , '^\s*//' ],
|
|
[ 'add_newlines' , ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
],
|
|
'JavaServer Faces' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Java' => [
|
|
[ 'docstring_rm_comments', ],
|
|
[ 'replace_regex', '\\\\$', ' '],
|
|
# Java seems to have more path globs in strings
|
|
# than other languages. The variations makes
|
|
# it tricky to craft a universal fix.
|
|
# \1 is a backreference to the first group, meaning
|
|
# either single or double quote
|
|
[ 'replace_between_regex', '(["\'])(.*?/\*)(.*?)\1',
|
|
'(.*?)' , '"xx"'],
|
|
[ 'replace_between_regex', '(["\'])(.*?\*/)(.*?)\1',
|
|
'(.*?)' , '"xx"'],
|
|
## separate /* inside quoted strings with two
|
|
## concatenated strings split between / and *
|
|
## -> defeated by "xx/**/*_xx" issue 365
|
|
#[ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\1',
|
|
# '(.*?)' , '"$1$2$1 + $1$3$1$4"'],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'JavaScript' => [
|
|
[ 'rm_comments_in_strings', "'", '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', "'", '//', '' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Jinja Template' => [
|
|
[ 'remove_between_general', '{#', '#}' ],
|
|
],
|
|
'JSX' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'JCL' => [ [ 'remove_jcl_comments' , ], ],
|
|
'JSON' => [ # ECMA-404, the JSON standard definition
|
|
# makes no provision for JSON comments
|
|
# so just use a placeholder filter
|
|
[ 'remove_matches' , '^\s*$' ],
|
|
],
|
|
'JSON5' => [ # same as JavaScript
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Julia' => [
|
|
[ 'remove_between_general', '#=', '=#' ],
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'docstring_to_C' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Juniper Junos' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'kvlang' => [
|
|
["remove_matches", '^\s*#[^:]'],
|
|
],
|
|
'Kotlin' => [
|
|
[ 'rm_comments_in_strings', '"""', '/*', '*/', 1],
|
|
[ 'rm_comments_in_strings', '"""', '//', '', 1],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Lean' => [
|
|
[ 'remove_between_general', '/-', '-/' ],
|
|
[ 'remove_matches' , '^\s*--' ],
|
|
[ 'remove_inline' , '--.*$' ],
|
|
],
|
|
'Lem' => [
|
|
[ 'remove_OCaml_comments', ],
|
|
],
|
|
'LESS' => [
|
|
# [ 'remove_matches' , '^\s*//' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'LFE' => [
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
[ 'remove_between_general', '#|', '|#' ],
|
|
],
|
|
'Linker Script' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/'],
|
|
[ 'call_regexp_common', 'C' ],
|
|
],
|
|
'liquid' => [
|
|
[ 'remove_between_general', '{% comment %}',
|
|
'{% endcomment %}' ],
|
|
[ 'remove_html_comments', ],
|
|
],
|
|
'Lisp' => [
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
[ 'remove_between_general', '#|', '|#' ],
|
|
],
|
|
'Lisp/OpenCL' => [ [ 'die' , ], ], # never called
|
|
'Lisp/Julia' => [ [ 'die' , ], ], # never called
|
|
'LiveLink OScript' => [ [ 'remove_matches' , '^\s*//' ], ],
|
|
'LLVM IR' => [
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
[ 'remove_inline' , ';.*$' ],
|
|
],
|
|
'Logos' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Logtalk' => [ # same filters as Prolog
|
|
[ 'remove_matches' , '^\s*\%' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_inline' , '(//|\%).*$' ],
|
|
],
|
|
'Lua' => [
|
|
[ 'remove_between_general', '--[=====[', ']=====]' ],
|
|
[ 'remove_between_general', '--[====[', ']====]' ],
|
|
[ 'remove_between_general', '--[===[', ']===]' ],
|
|
[ 'remove_between_general', '--[==[', ']==]' ],
|
|
[ 'remove_between_general', '--[=[', ']=]' ],
|
|
[ 'remove_between_general', '--[[', ']]' ],
|
|
[ 'remove_matches' , '^\s*\-\-' ],
|
|
],
|
|
'Luau' => [
|
|
[ 'remove_between_general', '--[=====[', ']=====]' ],
|
|
[ 'remove_between_general', '--[====[', ']====]' ],
|
|
[ 'remove_between_general', '--[===[', ']===]' ],
|
|
[ 'remove_between_general', '--[==[', ']==]' ],
|
|
[ 'remove_between_general', '--[=[', ']=]' ],
|
|
[ 'remove_between_general', '--[[', ']]' ],
|
|
[ 'remove_matches' , '^\s*\-\-' ],
|
|
],
|
|
'make' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Meson' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'MATLAB' => [
|
|
[ 'remove_between_general', '%{', '%}' ],
|
|
[ 'remove_matches' , '^\s*%' ],
|
|
[ 'remove_inline' , '%.*$' ],
|
|
],
|
|
'Mathematica' => [
|
|
[ 'remove_between_general', '(*', '*)' ],
|
|
],
|
|
'Maven/XML' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'Maven' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'Mercury' => [
|
|
[ 'remove_inline' , '%.*$' ],
|
|
[ 'remove_matches' , '^\s*%' ],
|
|
],
|
|
'Metal' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Modelica' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Modula3' => [ [ 'call_regexp_common' , 'Pascal' ], ],
|
|
# Modula 3 comments are (* ... *) so applying the Pascal filter
|
|
# which also treats { ... } as a comment is not really correct.
|
|
'Mojom' => [ [ 'call_regexp_common' , 'C++' ], ],
|
|
'Mojo' => [
|
|
[ 'remove_matches' , '/\*' ],
|
|
[ 'remove_matches' , '\*/' ],
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'docstring_to_C' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Nemerle' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Nunjucks' => [
|
|
[ 'remove_between_general', '{#', '#}' ],
|
|
],
|
|
'Objective-C' => [
|
|
# [ 'remove_matches' , '^\s*//' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Objective-C++' => [
|
|
# [ 'remove_matches' , '^\s*//' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'OCaml' => [
|
|
[ 'rm_comments_in_strings', '"', '(*', '*)', 1 ],
|
|
[ 'remove_OCaml_comments', ],
|
|
],
|
|
'OpenCL' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'PHP/Pascal/Fortran' => [ [ 'die' , ], ], # never called
|
|
'Mako' => [
|
|
[ 'remove_matches' , '##.*$' ],
|
|
],
|
|
'Markdown' => [
|
|
[ 'remove_between_general', '<!--', '-->' ],
|
|
[ 'remove_between_regex',
|
|
'\[(comment|\/\/)?\]\s*:?\s*(<\s*>|#)?\s*\(.*?', '.*?\)' ],
|
|
# http://stackoverflow.com/questions/4823468/comments-in-markdown
|
|
],
|
|
'MATLAB/Mathematica/Objective-C/MUMPS/Mercury' => [ [ 'die' , ], ], # never called
|
|
'MUMPS' => [ [ 'remove_matches' , '^\s*;' ], ],
|
|
'Mustache' => [
|
|
[ 'remove_between_general', '{{!', '}}' ],
|
|
],
|
|
'Nickel' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Nim' => [
|
|
[ 'remove_between_general', '#[', ']#' ],
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
# [ 'docstring_to_C' ],
|
|
# [ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'NetLogo' => [
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
[ 'remove_inline' , ';.*$' ],
|
|
],
|
|
'Nix' => [
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Octave' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Odin' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'OpenSCAD' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Oracle Forms' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
'Oracle Reports' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
'Oracle PL/SQL' => [
|
|
[ 'call_regexp_common' , 'PL/SQL' ],
|
|
],
|
|
'Pascal' => [
|
|
[ 'remove_between_regex', '\{[^$]', '}' ],
|
|
[ 'remove_between_general', '(*', '*)' ],
|
|
[ 'remove_matches' , '^\s*//' ],
|
|
],
|
|
####'Pascal' => [
|
|
#### [ 'call_regexp_common' , 'Pascal' ],
|
|
#### [ 'remove_matches' , '^\s*//' ],
|
|
#### ],
|
|
'Pascal/Puppet' => [ [ 'die' , ], ], # never called
|
|
'Puppet' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'P4' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Patran Command Language'=> [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_matches' , '^\s*\$#' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
],
|
|
'Perl' => [ [ 'remove_below' , '^__(END|DATA)__'],
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_below_above' , '^=head1', '^=cut' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'PEG' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'peg.js' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'peggy' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Pest' => [
|
|
[ 'remove_matches' , '^\s*//' ],
|
|
[ 'remove_inline' , '//.*$' ],
|
|
],
|
|
'Perl/Prolog' => [ [ 'die' , ], ], # never called
|
|
'PL/I' => [
|
|
[ 'call_regexp_common' , 'C' ],
|
|
],
|
|
'PL/M' => [
|
|
[ 'call_regexp_common' , 'C' ],
|
|
],
|
|
'PlantUML' => [
|
|
[ 'remove_between_general', "/'", "'/" ],
|
|
[ 'remove_matches' , "^\\s*'" ],
|
|
],
|
|
'Pig Latin' => [
|
|
[ 'remove_matches' , '^\s*--' ],
|
|
[ 'remove_inline' , '--.*$' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
],
|
|
'Prisma Schema' => [
|
|
[ 'remove_matches' , '^\s*//' ],
|
|
],
|
|
'Processing' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'ProGuard' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'PO File' => [
|
|
[ 'remove_matches' , '^\s*#[^,]' ], # '#,' is not a comment
|
|
],
|
|
'Pony' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'docstring_to_C' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'PowerBuilder' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'PowerShell' => [
|
|
[ 'powershell_to_C' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Prolog' => [
|
|
[ 'remove_matches' , '^\s*\%' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_inline' , '(//|\%).*$' ],
|
|
],
|
|
'Properties' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_matches' , '^\s*!' ],
|
|
],
|
|
'Protocol Buffers' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'PRQL' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
],
|
|
'Pug' => [
|
|
[ 'remove_pug_block' , ],
|
|
[ 'remove_matches' , '^\s*//' ],
|
|
[ 'remove_inline' , '//.*$' ],
|
|
],
|
|
'PureScript' => [
|
|
[ 'remove_matches' , '^\s*--' ],
|
|
[ 'remove_between_general', '{-', '-}' ],
|
|
[ 'remove_inline' , '--.*$' ],
|
|
],
|
|
'Python' => [
|
|
[ 'remove_matches' , '/\*' ],
|
|
[ 'remove_matches' , '\*/' ],
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'docstring_to_C' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Jupyter Notebook' => [ # these are JSON files; have no comments
|
|
# would have to parse JSON for
|
|
# "cell_type": "code"
|
|
# to count code lines
|
|
[ 'jupyter_nb' ],
|
|
[ 'remove_matches' , '^\s*$' ],
|
|
],
|
|
'PHP' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'QML' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
[ 'remove_inline' , '//.*$' ],
|
|
],
|
|
'Qt' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'Qt Linguist' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'Qt Project' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'R' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Ring' => [
|
|
[ 'remove_inline' , '#.*$' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Rmd' => [
|
|
[ 'reduce_to_rmd_code_blocks' ],
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Racket' => [
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
[ 'remove_inline' , ';.*$' ],
|
|
],
|
|
'Raku' => [ [ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_below_above' , '^=head1', '^=cut' ],
|
|
[ 'remove_below_above' , '^=begin', '^=end' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Raku/Prolog' => [ [ 'die' , ], ], # never called
|
|
'RAML' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'RapydScript' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'docstring_to_C' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Razor' => [
|
|
[ 'remove_between_general', '@*', '*@' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
[ 'remove_between_general', '<!--', '-->' ],
|
|
],
|
|
'reStructuredText' => [
|
|
[ 'remove_between_regex', '^\.\.', '^[^ \n\t\r\f\.]' ]
|
|
],
|
|
'Rexx' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
'ReasonML' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'ReScript' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'RobotFramework' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_matches' , '^\s*Comment' ],
|
|
[ 'remove_matches' , '^\s*\*{3}\s+(Variables|Test\s+Cases|Settings|Keywords)\s+\*{3}' ] ,
|
|
[ 'remove_matches' , '^\s*\[(Documentation|Tags)\]' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Ruby' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_below_above' , '^=begin', '^=end' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Ruby HTML' => [ [ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ], ],
|
|
'Circom' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Cairo' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Rust' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'SaltStack' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'SAS' => [
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_between_general', '*', ';' ],
|
|
],
|
|
'Sass' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Scala' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Scheme/SaltStack' => [ [ 'die' , ], ], # never called
|
|
'Scheme' => [
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
[ 'remove_inline' , ';.*$' ],
|
|
],
|
|
'SCSS' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'sed' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Slice' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Slim' => [
|
|
[ 'remove_slim_block' , ],
|
|
],
|
|
'Slint' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'SKILL' => [
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
],
|
|
'SKILL++' => [
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
],
|
|
'Squirrel' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Starlark' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'docstring_to_C' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'BizTalk Pipeline' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'BizTalk Orchestration' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'Solidity' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'SparForte' => [
|
|
[ 'remove_matches' , '^\s*#!' ],
|
|
[ 'remove_matches' , '^\s*--' ],
|
|
],
|
|
'Specman e' => [
|
|
[ 'pre_post_fix' , "'>", "<'"],
|
|
[ 'remove_between_general', "^'>", "^<'" ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++', ],
|
|
[ 'remove_matches' , '^\s*--' ],
|
|
[ 'rm_last_line' , ], # undo pre_post_fix addition
|
|
# of trailing line of just <'
|
|
],
|
|
'SQL' => [
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_matches' , '^\s*--' ],
|
|
[ 'remove_inline' , '--.*$' ],
|
|
],
|
|
'SQL Stored Procedure'=> [
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_matches' , '^\s*--' ],
|
|
[ 'remove_inline' , '--.*$' ],
|
|
],
|
|
'SQL Data' => [
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_matches' , '^\s*--' ],
|
|
[ 'remove_inline' , '--.*$' ],
|
|
],
|
|
'Smalltalk' => [
|
|
[ 'call_regexp_common' , 'Smalltalk' ],
|
|
],
|
|
'Smarty' => [
|
|
[ 'smarty_to_C' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
],
|
|
'Snakemake' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'docstring_to_C' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Standard ML' => [
|
|
[ 'remove_between_general', '(*', '*)' ],
|
|
],
|
|
'Stata' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Stylus' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'SugarSS' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Svelte' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'SVG' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'Swift' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'SWIG' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
|
|
'm4' => [ [ 'remove_matches' , '^dnl\s' ], ],
|
|
'C Shell' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Kermit' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Korn Shell' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'TableGen' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Tcl/Tk' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'TEAL' => [
|
|
[ 'remove_matches' , '^\s*//' ],
|
|
],
|
|
'Teamcenter met' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
'Teamcenter mth' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
'TeX' => [
|
|
[ 'remove_matches' , '^\s*%' ],
|
|
[ 'remove_inline' , '%.*$' ],
|
|
],
|
|
'Text' => [
|
|
[ 'remove_matches' , '^\s*$' ],
|
|
],
|
|
'TLA+' => [
|
|
[ 'remove_TLAPlus_generated_code' ],
|
|
[ 'remove_matches' , '^\\s*\\\\\\*' ],
|
|
[ 'remove_TLAPlus_comments' ],
|
|
],
|
|
'Thrift' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Titanium Style Sheet' => [
|
|
[ 'remove_matches' , '^\s*//' ],
|
|
[ 'remove_inline' , '//.*$' ],
|
|
[ 'remove_between_regex', '/[^/]', '[^/]/' ],
|
|
],
|
|
'TNSDL' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'TOML' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'TTCN' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'TITAN Project File Information' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'tspeg' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Twig' => [
|
|
[ 'remove_between_general', '{#', '#}' ],
|
|
],
|
|
'TypeScript' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Typst' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Umka' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Unity-Prefab' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Visual Fox Pro' => [
|
|
[ 'remove_matches' , '^\s*\*' ],
|
|
[ 'remove_inline' , '\*.*$' ],
|
|
[ 'remove_matches' , '^\s*&&' ],
|
|
[ 'remove_inline' , '&&.*$' ],
|
|
],
|
|
'Softbridge Basic' => [ [ 'remove_above' , '^\s*Attribute\s+VB_Name\s+=' ],
|
|
[ 'remove_matches' , '^\s*Attribute\s+'],
|
|
[ 'remove_matches' , '^\s*\47'], ], # \47 = '
|
|
# http://www.altium.com/files/learningguides/TR0114%20VHDL%20Language%20Reference.pdf
|
|
'Vala' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Vala Header' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Verilog-SystemVerilog/Coq' => [ ['die'] ], # never called
|
|
'Verilog-SystemVerilog' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'VHDL' => [
|
|
[ 'remove_matches' , '^\s*--' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
[ 'remove_inline' , '--.*$' ],
|
|
],
|
|
'vim script' => [
|
|
[ 'remove_matches' , '^\s*"' ],
|
|
[ 'remove_inline' , '".*$' ],
|
|
],
|
|
'Visual Basic Script' => [
|
|
[ 'remove_above' , '^\s*Attribute\s+VB_Name\s+=' ],
|
|
[ 'remove_matches' , '^\s*Attribute\s+'],
|
|
[ 'remove_matches' , '^\s*\47'], # \47 = '
|
|
],
|
|
'Visual Basic .NET' => [
|
|
[ 'remove_above' , '^\s*Attribute\s+VB_Name\s+=' ],
|
|
[ 'remove_matches' , '^\s*Attribute\s+'],
|
|
[ 'remove_matches' , '^\s*\47'], # \47 = '
|
|
],
|
|
'VB for Applications' => [
|
|
[ 'remove_above' , '^\s*Attribute\s+VB_Name\s+=' ],
|
|
[ 'remove_matches' , '^\s*Attribute\s+'],
|
|
[ 'remove_matches' , '^\s*\47'], # \47 = '
|
|
],
|
|
'Visual Basic' => [
|
|
[ 'remove_above' , '^\s*Attribute\s+VB_Name\s+=' ],
|
|
[ 'remove_matches' , '^\s*Attribute\s+'],
|
|
[ 'remove_matches' , '^\s*\47'], # \47 = '
|
|
],
|
|
'Visualforce Component' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'Visualforce Page' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'Velocity Template Language' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
[ 'remove_jsp_comments' , ],
|
|
[ 'remove_matches' , '^\s*##' ],
|
|
[ 'remove_between_general', '#**', '*#' ],
|
|
[ 'add_newlines' , ],
|
|
],
|
|
'Vuejs Component' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Vyper' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'docstring_to_C' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'Teamcenter def' => [ [ 'remove_matches' , '^\s*#' ], ],
|
|
'Windows Module Definition' => [
|
|
[ 'remove_matches' , '^\s*;' ],
|
|
[ 'remove_inline' , ';.*$' ],
|
|
],
|
|
'yacc' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'YAML' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
'lex' => [ [ 'call_regexp_common' , 'C' ], ],
|
|
'XAML' => [ [ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ], ],
|
|
'xBase Header' => [
|
|
# [ 'remove_matches' , '^\s*//' ],
|
|
[ 'remove_matches' , '^\s*\&\&' ],
|
|
[ 'remove_matches' , '^\s*\*' ],
|
|
[ 'remove_matches' , '^\s*NOTE' ],
|
|
[ 'remove_matches' , '^\s*note' ],
|
|
[ 'remove_matches' , '^\s*Note' ],
|
|
[ 'remove_inline' , '\&\&.*$' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'xBase' => [
|
|
# [ 'remove_matches' , '^\s*//' ],
|
|
[ 'remove_matches' , '^\s*\&\&' ],
|
|
[ 'remove_matches' , '^\s*\*' ],
|
|
[ 'remove_matches' , '^\s*NOTE' ],
|
|
[ 'remove_matches' , '^\s*note' ],
|
|
[ 'remove_matches' , '^\s*Note' ],
|
|
[ 'remove_inline' , '\&\&.*$' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'MXML' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
[ 'remove_matches' , '^\s*//' ],
|
|
[ 'add_newlines' , ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
],
|
|
'Web Services Description' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'WebAssembly' => [
|
|
[ 'remove_matches' , '^\s*;;' ],
|
|
],
|
|
'WGSL' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Windows Message File' => [
|
|
[ 'remove_matches' , '^\s*;\s*//' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
[ 'remove_matches' , '^\s*;\s*$' ],
|
|
# next line only hypothetical
|
|
# [ 'remove_matches_2re' , '^\s*;\s*/\*',
|
|
# '^\s*;\s*\*/', ],
|
|
],
|
|
'Windows Resource File' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'WiX source' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'WiX include' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'WiX string localization' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'WXML' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'WXSS' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C' ],
|
|
],
|
|
'X++' => [
|
|
[ 'remove_matches', '\s*#\s*//' ],
|
|
[ 'remove_between_regex', '#\s*/\*', '\*/' ],
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'XMI' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'XML' => [
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'XQuery' => [
|
|
[ 'remove_between_general', '(:', ':)' ],
|
|
],
|
|
'XSD' => [ [ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ], ],
|
|
'XSLT' => [ [ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ], ],
|
|
'Xtend' => [ # copy of Java, plus triple << inline
|
|
# separate /* inside quoted strings with two
|
|
# concatenated strings split between / and *
|
|
[ 'replace_between_regex', '(["\'])(.*?/)(\*.*?)\1',
|
|
'(.*?)' , '"$1$2$1 + $1$3$1$4"', 0],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
[ 'remove_matches' , '^\s*\x{c2ab}{3}' ], # doesn't work
|
|
# \xCA2B is unicode << character
|
|
],
|
|
'NAnt script' => [ [ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ], ],
|
|
'MSBuild script' => [ [ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ], ],
|
|
'Visual Studio Module' => [
|
|
[ 'rm_comments_in_strings', '"', '/*', '*/' ],
|
|
[ 'rm_comments_in_strings', '"', '//', '' ],
|
|
[ 'call_regexp_common' , 'C++' ],
|
|
],
|
|
'Visual Studio Solution' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_html_comments', ],
|
|
[ 'call_regexp_common' , 'HTML' ],
|
|
],
|
|
'Zig' => [
|
|
[ 'remove_matches' , '^\s*//' ],
|
|
[ 'remove_inline' , '//.*$' ],
|
|
],
|
|
'zsh' => [
|
|
[ 'remove_matches' , '^\s*#' ],
|
|
[ 'remove_inline' , '#.*$' ],
|
|
],
|
|
);
|
|
# 1}}}
|
|
%{$rh_EOL_continuation_re} = ( # {{{1
|
|
'ActionScript' => '\\\\$' ,
|
|
'AspectJ' => '\\\\$' ,
|
|
'Assembly' => '\\\\$' ,
|
|
'ASP' => '\\\\$' ,
|
|
'ASP.NET' => '\\\\$' ,
|
|
'Ada' => '\\\\$' ,
|
|
'awk' => '\\\\$' ,
|
|
'bc' => '\\\\$' ,
|
|
'C' => '\\\\$' ,
|
|
'C++' => '\\\\$' ,
|
|
'C/C++ Header' => '\\\\$' ,
|
|
'CMake' => '\\\\$' ,
|
|
'Cython' => '\\\\$' ,
|
|
'C#' => '\\\\$' ,
|
|
'C# Designer' => '\\\\$' ,
|
|
'Cake Build Script' => '\\\\$' ,
|
|
'D' => '\\\\$' ,
|
|
'Dart' => '\\\\$' ,
|
|
'Expect' => '\\\\$' ,
|
|
'Futhark' => '\\\\$' ,
|
|
'Gencat NLS' => '\\\\$' ,
|
|
'Go' => '\\\\$' ,
|
|
'IDL' => '\$\\$' ,
|
|
'Igor Pro' => '\\$' ,
|
|
# 'Java' => '\\\\$' ,
|
|
'JavaScript' => '\\\\$' ,
|
|
'JSON5' => '\\\\$' ,
|
|
'JSX' => '\\\\$' ,
|
|
'LESS' => '\\\\$' ,
|
|
'Lua' => '\\\\$' ,
|
|
'Luau' => '\\\\$' ,
|
|
'make' => '\\\\$' ,
|
|
'MATLAB' => '\.\.\.\s*$' ,
|
|
'Meson' => '\\\\$' ,
|
|
'Metal' => '\\\\$' ,
|
|
'MXML' => '\\\\$' ,
|
|
'Objective-C' => '\\\\$' ,
|
|
'Objective-C++' => '\\\\$' ,
|
|
'OCaml' => '\\\\$' ,
|
|
'Octave' => '\.\.\.\s*$' ,
|
|
'Qt Project' => '\\\\$' ,
|
|
'Patran Command Language'=> '\\\\$' ,
|
|
'PowerBuilder' => '\\\\$' ,
|
|
'PowerShell' => '\\\\$' ,
|
|
'Python' => '\\\\$' ,
|
|
'R' => '\\\\$' ,
|
|
'Rmd' => '\\\\$' ,
|
|
'Ruby' => '\\\\$' ,
|
|
'sed' => '\\\\$' ,
|
|
'Swift' => '\\\\$' ,
|
|
'Bourne Again Shell' => '\\\\$' ,
|
|
'Bourne Shell' => '\\\\$' ,
|
|
'C Shell' => '\\\\$' ,
|
|
'kvlang' => '\\\\$' ,
|
|
'Kermit' => '\\\\$' ,
|
|
'Korn Shell' => '\\\\$' ,
|
|
'Slint' => '\\\\$' ,
|
|
'Snakemake' => '\\\\$' ,
|
|
'Starlark' => '\\\\$' ,
|
|
'Solidity' => '\\\\$' ,
|
|
'Stata' => '///$' ,
|
|
'Stylus' => '\\\\$' ,
|
|
'Tcl/Tk' => '\\\\$' ,
|
|
'TTCN' => '\\\\$' ,
|
|
'TypeScript' => '\\\\$' ,
|
|
'lex' => '\\\\$' ,
|
|
'Vala' => '\\\\$' ,
|
|
'Vala Header' => '\\\\$' ,
|
|
'Vyper' => '\\\\$' ,
|
|
'X++' => '\\\\$' ,
|
|
'zsh' => '\\\\$' ,
|
|
);
|
|
# 1}}}
|
|
%{$rh_Not_Code_Extension} = ( # {{{1
|
|
'1' => 1, # Man pages (documentation):
|
|
'2' => 1,
|
|
'3' => 1,
|
|
'4' => 1,
|
|
'5' => 1,
|
|
'6' => 1,
|
|
'7' => 1,
|
|
'8' => 1,
|
|
'9' => 1,
|
|
'a' => 1, # Static object code.
|
|
'ad' => 1, # X application default resource file.
|
|
'afm' => 1, # font metrics
|
|
'arc' => 1, # arc(1) archive
|
|
'arj' => 1, # arj(1) archive
|
|
'au' => 1, # Audio sound filearj(1) archive
|
|
'bak' => 1, # Backup files - we only want to count the "real" files.
|
|
'bdf' => 1,
|
|
'bmp' => 1,
|
|
'bz2' => 1, # bzip2(1) compressed file
|
|
'csv' => 1,
|
|
'desktop' => 1,
|
|
'dic' => 1,
|
|
'doc' => 1,
|
|
'elc' => 1,
|
|
'eps' => 1,
|
|
'fig' => 1,
|
|
'gif' => 1,
|
|
'gz' => 1,
|
|
'h5' => 1, # hierarchical data format
|
|
'hdf' => 1, # hierarchical data format
|
|
'in' => 1, # Debatable.
|
|
'jpg' => 1,
|
|
'kdelnk' => 1,
|
|
'man' => 1,
|
|
'mf' => 1,
|
|
'mp3' => 1,
|
|
'n' => 1,
|
|
'o' => 1, # Object code is generated from source code.
|
|
'o.cmd' => 1, # not DOS Batch; Linux kernel compilation optimization file
|
|
'o.d' => 1, # cmake object dependency file
|
|
'pbm' => 1,
|
|
'pdf' => 1,
|
|
'pfb' => 1,
|
|
'png' => 1,
|
|
'ppt' => 1,
|
|
'pptx' => 1,
|
|
'ps' => 1, # Postscript is _USUALLY_ generated automatically.
|
|
'sgm' => 1,
|
|
'sgml' => 1,
|
|
'so' => 1, # Dynamically-loaded object code.
|
|
'Tag' => 1,
|
|
'tfm' => 1,
|
|
'tgz' => 1, # gzipped tarball
|
|
'tiff' => 1,
|
|
'tsv' => 1, # tab separated values
|
|
'vf' => 1,
|
|
'wav' => 1,
|
|
'xbm' => 1,
|
|
'xls' => 1,
|
|
'xlsx' => 1,
|
|
'xpm' => 1,
|
|
'Y' => 1, # file compressed with "Yabba"
|
|
'Z' => 1, # file compressed with "compress"
|
|
'zip' => 1, # zip archive
|
|
'gitattributes' => 1,
|
|
'gitignore' => 1,
|
|
'gitmodules' => 1,
|
|
); # 1}}}
|
|
%{$rh_Not_Code_Filename} = ( # {{{1
|
|
'AUTHORS' => 1,
|
|
'BUGS' => 1,
|
|
'BUGS' => 1,
|
|
'Changelog' => 1,
|
|
'ChangeLog' => 1,
|
|
'ChangeLog' => 1,
|
|
'Changes' => 1,
|
|
'CHANGES' => 1,
|
|
'COPYING' => 1,
|
|
'COPYING' => 1,
|
|
'DESCRIPTION' => 1, # R packages metafile
|
|
'.cvsignore' => 1,
|
|
'Entries' => 1,
|
|
'FAQ' => 1,
|
|
'INSTALL' => 1,
|
|
'MAINTAINERS' => 1,
|
|
'MD5SUMS' => 1,
|
|
'NAMESPACE' => 1, # R packages metafile
|
|
'NEWS' => 1,
|
|
'readme' => 1,
|
|
'Readme' => 1,
|
|
'README' => 1,
|
|
'README.tk' => 1, # used in kdemultimedia, it's confusing.
|
|
'Repository' => 1,
|
|
'Root' => 1, # CVS
|
|
'TODO' => 1,
|
|
);
|
|
# 1}}}
|
|
%{$rh_Scale_Factor} = ( # {{{1
|
|
'(unknown)' => 0.00,
|
|
'1st generation default' => 0.25,
|
|
'2nd generation default' => 0.75,
|
|
'3rd generation default' => 1.00,
|
|
'4th generation default' => 4.00,
|
|
'5th generation default' => 16.00,
|
|
'ABAP' => 5.00,
|
|
'ActionScript' => 1.36,
|
|
'Ada' => 0.52,
|
|
'ADSO/IDSM' => 3.00,
|
|
'Agda' => 2.11,
|
|
'AMPLE' => 2.00,
|
|
'AnsProlog' => 1.25,
|
|
'Ant/XML' => 1.90,
|
|
'Ant' => 1.90,
|
|
'ANTLR Grammar' => 2.00,
|
|
'SQL' => 6.15,
|
|
'SQL Stored Procedure' => 6.15,
|
|
'SQL Data' => 1.00,
|
|
'Apex Class' => 1.50,
|
|
'APL' => 2.50,
|
|
'ArkTs' => 2.50,
|
|
'Arturo' => 4.00,
|
|
'AsciiDoc' => 1.50,
|
|
'AspectJ' => 1.36,
|
|
'asa' => 1.29,
|
|
'ASP' => 1.29,
|
|
'ASP.NET' => 1.29,
|
|
'CCS' => 5.33,
|
|
'Apex Trigger' => 1.4 ,
|
|
'AppleScript' => 4.0 ,
|
|
'Arduino Sketch' => 1.00,
|
|
'Assembly' => 0.25,
|
|
'Astro' => 3.0,
|
|
'Asymptote' => 2.50,
|
|
'autocoder' => 0.25,
|
|
'AutoHotkey' => 1.29,
|
|
'awk' => 3.81,
|
|
'basic' => 0.75,
|
|
'Bazel' => 1.00,
|
|
'bc' => 1.50,
|
|
'Blade' => 2.00,
|
|
'bliss' => 0.75,
|
|
'bmsgen' => 2.22,
|
|
'bteq' => 6.15,
|
|
'Brainfuck' => 0.10,
|
|
'BrightScript' => 2.00,
|
|
'builder' => 2.00,
|
|
'C' => 0.77,
|
|
'c set 2' => 0.88,
|
|
'C#' => 1.36,
|
|
'C# Designer' => 1.36,
|
|
'C# Generated' => 1.36,
|
|
'Cake Build Script' => 1.36,
|
|
'C++' => 1.51,
|
|
'Carbon' => 1.51,
|
|
'ColdFusion' => 4.00,
|
|
'ColdFusion CFScript' => 4.00,
|
|
'Chapel' => 2.96,
|
|
'Clean' => 2.50,
|
|
'Clojure' => 1.25,
|
|
'ClojureScript' => 1.25,
|
|
'ClojureC' => 1.25,
|
|
'CMake' => 1.00,
|
|
'COBOL' => 1.04,
|
|
'CoCoA 5' => 1.04,
|
|
'CoffeeScript' => 2.00,
|
|
'Constraint Grammar' => 4.00,
|
|
'Containerfile' => 2.00,
|
|
'Coq' => 5.00,
|
|
'Crystal' => 2.50,
|
|
'Crystal Reports' => 4.00,
|
|
'csl' => 1.63,
|
|
'CSON' => 2.50,
|
|
'csp' => 1.51,
|
|
'cssl' => 1.74,
|
|
'CSS' => 1.0,
|
|
'CSV' => 0.1,
|
|
'Cucumber' => 3.00,
|
|
'CUDA' => 1.00,
|
|
'D' => 1.70,
|
|
'Dafny' => 3.00,
|
|
'DAL' => 1.50,
|
|
'Dart' => 2.00,
|
|
'DenizenScript' => 1.00,
|
|
'Delphi Form' => 2.00,
|
|
'DIET' => 2.00,
|
|
'diff' => 1.00,
|
|
'Derw' => 3.00,
|
|
'dhall' => 2.11,
|
|
'DITA' => 1.90,
|
|
'dtrace' => 2.00,
|
|
'NASTRAN DMAP' => 2.35,
|
|
'DOORS Extension Language' => 1.50,
|
|
'Dockerfile' => 2.00,
|
|
'DOS Batch' => 0.63,
|
|
'Drools' => 2.00,
|
|
'ECPP' => 1.90,
|
|
'eda/sql' => 6.67,
|
|
'edscheme 3.4' => 1.51,
|
|
'EEx' => 2.00,
|
|
'EJS' => 2.50,
|
|
'Elixir' => 2.11,
|
|
'Elm' => 2.50,
|
|
'Embedded Crystal' => 2.00,
|
|
'ERB' => 2.00,
|
|
'Erlang' => 2.11,
|
|
'Fennel' => 2.50,
|
|
'Finite State Language' => 2.00,
|
|
'Focus' => 1.90,
|
|
'Forth' => 1.25,
|
|
'Fortran 66' => 0.63,
|
|
'Fortran 77' => 0.75,
|
|
'Fortran 90' => 1.00,
|
|
'Fortran 95' => 1.13,
|
|
'Fortran II' => 0.63,
|
|
'foundation' => 2.76,
|
|
'Freemarker Template' => 1.48,
|
|
'Futhark' => 3.00, # Guessed from value of ML
|
|
'F#' => 2.50,
|
|
'F# Script' => 2.50,
|
|
'Flatbuffers' => 2.50,
|
|
'Glade' => 2.00,
|
|
'Gleam' => 2.50,
|
|
'GLSL' => 2.00,
|
|
'Glimmer JavaScript' => 3.50,
|
|
'Glimmer TypeScript' => 3.50,
|
|
'gml' => 1.74,
|
|
'gpss' => 1.74,
|
|
'guest' => 2.86,
|
|
'guru' => 1.63,
|
|
'GDScript' => 2.50,
|
|
'Godot Scene' => 2.50,
|
|
'Godot Resource' => 2.50,
|
|
'Godot Shaders' => 2.50,
|
|
'Go' => 2.50,
|
|
'Gradle' => 4.00,
|
|
'Grails' => 1.48,
|
|
'GraphQL' => 4.00,
|
|
'Groovy' => 4.10,
|
|
'gw basic' => 0.82,
|
|
'HCL' => 2.50,
|
|
'high c' => 0.63,
|
|
'hlevel' => 1.38,
|
|
'hp basic' => 0.63,
|
|
'Haml' => 2.50,
|
|
'Handlebars' => 2.50,
|
|
'Harbour' => 2.00,
|
|
'Hare' => 2.50,
|
|
'Haskell' => 2.11,
|
|
'Haxe' => 2.00,
|
|
'HolyC' => 2.50,
|
|
'Hoon' => 2.00,
|
|
'HTML' => 1.90,
|
|
'HTML EEx' => 3.00,
|
|
'XHTML' => 1.90,
|
|
'XMI' => 1.90,
|
|
'XML' => 1.90,
|
|
'FXML' => 1.90,
|
|
'MXML' => 1.90,
|
|
'XSLT' => 1.90,
|
|
'DTD' => 1.90,
|
|
'XSD' => 1.90,
|
|
'NAnt script' => 1.90,
|
|
'MSBuild script' => 1.90,
|
|
'Visual Studio Module' => 1.00,
|
|
'Visual Studio Solution' => 1.00,
|
|
'HLSL' => 2.00,
|
|
'Idris' => 2.00,
|
|
'Literate Idris' => 2.00,
|
|
'Igor Pro' => 4.00,
|
|
'Imba' => 3.00,
|
|
'INI' => 1.00,
|
|
'InstallShield' => 1.90,
|
|
'IPL' => 2.00,
|
|
'Jai' => 1.13,
|
|
'Jam' => 2.00,
|
|
'Janet' => 3.00,
|
|
'Java' => 1.36,
|
|
'JavaScript' => 1.48,
|
|
'JavaServer Faces' => 1.5 ,
|
|
'Jinja Template' => 1.5 ,
|
|
'JSON' => 2.50,
|
|
'JSON5' => 2.50,
|
|
'JSP' => 1.48,
|
|
'JSX' => 1.48,
|
|
'Velocity Template Language' => 1.00,
|
|
'JCL' => 1.67,
|
|
'Juniper Junos' => 2.00,
|
|
'kvlang' => 2.00,
|
|
'Kermit' => 2.00,
|
|
'Korn Shell' => 3.81,
|
|
'Kotlin' => 2.00,
|
|
'Lean' => 3.00,
|
|
'LESS' => 1.50,
|
|
'Lem' => 3.00,
|
|
'LFE' => 1.25,
|
|
'Linker Script' => 1.00,
|
|
'liquid' => 3.00,
|
|
'Lisp' => 1.25,
|
|
'LiveLink OScript' => 3.5 ,
|
|
'LLVM IR' => 0.90,
|
|
'Logos' => 2.00,
|
|
'Logtalk' => 2.00,
|
|
'm4' => 1.00,
|
|
'make' => 2.50,
|
|
'Mako' => 1.50,
|
|
'Markdown' => 1.00,
|
|
'mathcad' => 16.00,
|
|
'Maven' => 1.90,
|
|
'Meson' => 1.00,
|
|
'Metal' => 1.51,
|
|
'Modelica' => 2.00,
|
|
'MUMPS' => 4.21,
|
|
'Mustache' => 1.75,
|
|
'Nastran' => 1.13,
|
|
'Nemerle' => 2.50,
|
|
'NetLogo' => 4.00,
|
|
'Nickel' => 2.00,
|
|
'Nim' => 2.00,
|
|
'Nix' => 2.70,
|
|
'Nunjucks' => 1.5 ,
|
|
'Objective-C' => 2.96,
|
|
'Objective-C++' => 2.96,
|
|
'OCaml' => 3.00,
|
|
'Odin' => 2.00,
|
|
'OpenSCAD' => 1.00,
|
|
'Oracle Reports' => 2.76,
|
|
'Oracle Forms' => 2.67,
|
|
'Oracle Developer/2000' => 3.48,
|
|
'Other' => 1.00,
|
|
'P4' => 1.5 ,
|
|
'Pascal' => 0.88,
|
|
'Patran Command Language' => 2.50,
|
|
'Perl' => 4.00,
|
|
'PEG' => 3.00,
|
|
'peg.js' => 3.00,
|
|
'peggy' => 3.00,
|
|
'Pest' => 2.00,
|
|
'tspeg' => 3.00,
|
|
'Pig Latin' => 1.00,
|
|
'PL/I' => 1.38,
|
|
'PL/M' => 1.13,
|
|
'PlantUML' => 2.00,
|
|
'Oracle PL/SQL' => 2.58,
|
|
'PO File' => 1.50,
|
|
'Pony' => 3.00,
|
|
'PowerBuilder' => 3.33,
|
|
'PowerShell' => 3.00,
|
|
'Prisma Schema' => 2.50,
|
|
'Processing' => 2.50,
|
|
'ProGuard' => 2.50,
|
|
'Prolog' => 1.25,
|
|
'Properties' => 1.36,
|
|
'Protocol Buffers' => 2.00,
|
|
'PRQL' => 3.00,
|
|
'Pug' => 2.00,
|
|
'Puppet' => 2.00,
|
|
'PureScript' => 2.00,
|
|
'QML' => 1.25,
|
|
'Qt' => 2.00,
|
|
'Qt Linguist' => 1.00,
|
|
'Qt Project' => 1.00,
|
|
'R' => 3.00,
|
|
'Rmd' => 3.00,
|
|
'Racket' => 1.50,
|
|
'Raku' => 4.00,
|
|
'rally' => 2.00,
|
|
'ramis ii' => 2.00,
|
|
'RAML' => 0.90,
|
|
'ReasonML' => 2.50,
|
|
'ReScript' => 2.50,
|
|
'reStructuredText' => 1.50,
|
|
'Razor' => 2.00,
|
|
'Rexx' => 1.19,
|
|
'Ring' => 4.20,
|
|
'RobotFramework' => 2.50,
|
|
'Circom' => 1.00,
|
|
'Cairo' => 1.00,
|
|
'Rust' => 1.00,
|
|
'sas' => 1.95,
|
|
'Scala' => 4.10,
|
|
'Scheme' => 1.51,
|
|
'Slim' => 3.00,
|
|
'Solidity' => 1.48,
|
|
'Bourne Shell' => 3.81,
|
|
'Bourne Again Shell' => 3.81,
|
|
'ksh' => 3.81,
|
|
'zsh' => 3.81,
|
|
'Fish Shell' => 3.81,
|
|
'C Shell' => 3.81,
|
|
'SaltStack' => 2.00,
|
|
'SAS' => 1.5 ,
|
|
'Sass' => 1.5 ,
|
|
'SCSS' => 1.5 ,
|
|
'SKILL' => 2.00,
|
|
'SKILL++' => 2.00,
|
|
'Slice' => 1.50,
|
|
'Slint' => 1.00,
|
|
'Smalltalk' => 4.00,
|
|
'Smarty' => 3.50,
|
|
'Softbridge Basic' => 2.76,
|
|
'SparForte' => 3.80,
|
|
'sps' => 0.25,
|
|
'spss' => 2.50,
|
|
'Specman e' => 2.00,
|
|
'SQL' => 2.29,
|
|
'Squirrel' => 2.50,
|
|
'Standard ML' => 3.00,
|
|
'Stata' => 3.00,
|
|
'Stylus' => 1.48,
|
|
'SugarSS' => 2.50,
|
|
'Svelte' => 2.00,
|
|
'SVG' => 1.00,
|
|
'Swift' => 2.50,
|
|
'SWIG' => 2.50,
|
|
'TableGen' => 2.00,
|
|
'Tcl/Tk' => 4.00,
|
|
'TEAL' => 0.50,
|
|
'Teamcenter def' => 1.00,
|
|
'Teamcenter met' => 1.00,
|
|
'Teamcenter mth' => 1.00,
|
|
'TeX' => 1.50,
|
|
'Text' => 0.50,
|
|
'Thrift' => 2.50,
|
|
'TLA+' => 1.00,
|
|
'Titanium Style Sheet' => 2.00,
|
|
'TOML' => 2.76,
|
|
'Twig' => 2.00,
|
|
'TNSDL' => 2.00,
|
|
'TTCN' => 2.00,
|
|
'TITAN Project File Information' => 1.90,
|
|
'TypeScript' => 2.00,
|
|
'Typst' => 3.00,
|
|
'Umka' => 2.00,
|
|
'Unity-Prefab' => 2.50,
|
|
'Vala' => 1.50,
|
|
'Vala Header' => 1.40,
|
|
'Verilog-SystemVerilog' => 1.51,
|
|
'VHDL' => 4.21,
|
|
'vim script' => 3.00,
|
|
'Visual Basic' => 2.76,
|
|
'VB for Applications' => 2.76,
|
|
'Visual Basic .NET' => 2.76,
|
|
'Visual Basic Script' => 2.76,
|
|
'Visual Fox Pro' => 4.00, # Visual Fox Pro is not available in the language gearing ratios listed at Mayes Consulting web site
|
|
'Visualforce Component' => 1.9 ,
|
|
'Visualforce Page' => 1.9 ,
|
|
'Vuejs Component' => 2.00,
|
|
'Vyper' => 4.20,
|
|
'Web Services Description' => 1.00,
|
|
'WebAssembly' => 0.45,
|
|
'WGSL' => 2.50,
|
|
'Windows Message File' => 1.00,
|
|
'Windows Resource File' => 1.00,
|
|
'Windows Module Definition' => 1.00,
|
|
'WiX source' => 1.90,
|
|
'WiX include' => 1.90,
|
|
'WiX string localization' => 1.90,
|
|
'WXML' => 1.90,
|
|
'WXSS' => 1.00,
|
|
'xBase' => 2.00,
|
|
'xBase Header' => 2.00,
|
|
'xlisp' => 1.25,
|
|
'X++' => 1.51, # This is a guess. Copied from C++, because the overhead for C++ headers might be equivalent to the overhead of structuring elements in XPO files
|
|
'XAML' => 1.90,
|
|
'XQuery' => 2.50,
|
|
'yacc' => 1.51,
|
|
'yacc++' => 1.51,
|
|
'YAML' => 0.90,
|
|
'Expect' => 2.00,
|
|
'Gencat NLS' => 1.50,
|
|
'C/C++ Header' => 1.00,
|
|
'inc' => 1.00,
|
|
'lex' => 1.00,
|
|
'Julia' => 4.00,
|
|
'MATLAB' => 4.00,
|
|
'Mathematica' => 5.00,
|
|
'Mercury' => 3.00,
|
|
'Maven/XML' => 2.5,
|
|
'IDL' => 3.80,
|
|
'Octave' => 4.00,
|
|
'ML' => 3.00,
|
|
'Modula3' => 2.00,
|
|
'Mojom' => 2.00,
|
|
'Mojo' => 4.20,
|
|
'PHP' => 3.50,
|
|
'Jupyter Notebook' => 4.20,
|
|
'Python' => 4.20,
|
|
'Snakemake' => 4.20,
|
|
'RapydScript' => 4.20,
|
|
'Starlark' => 4.20,
|
|
'BizTalk Pipeline' => 1.00,
|
|
'BizTalk Orchestration' => 1.00,
|
|
'Cython' => 3.80,
|
|
'Ruby' => 4.20,
|
|
'Ruby HTML' => 4.00,
|
|
'sed' => 4.00,
|
|
'Lua' => 4.00,
|
|
'Luau' => 4.50,
|
|
'OpenCL' => 1.50,
|
|
'Xtend' => 2.00,
|
|
'Zig' => 2.50,
|
|
# aggregates; value is meaningless
|
|
'C#/Smalltalk' => 1.00,
|
|
'D/dtrace' => 1.00,
|
|
'F#/Forth' => 1.00,
|
|
'Fortran 77/Forth' => 1.00,
|
|
'Lisp/Julia' => 1.00,
|
|
'Lisp/OpenCL' => 1.00,
|
|
'PHP/Pascal/Fortran' => 1.00,
|
|
'Pascal/Puppet' => 1.00,
|
|
'Perl/Prolog' => 1.00,
|
|
'Raku/Prolog' => 1.00,
|
|
'Verilog-SystemVerilog/Coq' => 1.00,
|
|
'MATLAB/Mathematica/Objective-C/MUMPS/Mercury' => 1.00,
|
|
'IDL/Qt Project/Prolog/ProGuard' => 1.00,
|
|
);
|
|
# 1}}}
|
|
%{$rh_Known_Binary_Archives} = ( # {{{1
|
|
'.tar' => 1 ,
|
|
'.tar.Z' => 1 ,
|
|
'.tar.gz' => 1 ,
|
|
'.tar.bz2' => 1 ,
|
|
'.zip' => 1 ,
|
|
'.Zip' => 1 ,
|
|
'.ZIP' => 1 ,
|
|
'.ear' => 1 , # Java
|
|
'.war' => 1 , # contained within .ear
|
|
'.xz' => 1 ,
|
|
'.whl' => 1 , # Python wheel files (zip)
|
|
);
|
|
# 1}}}
|
|
} # end sub set_constants()
|
|
sub check_scale_existence { # {{{1
|
|
# do a few sanity checks
|
|
my ($rhaa_Filters_by_Language,
|
|
$rh_Language_by_Extension,
|
|
$rh_Scale_Factor) = @_;
|
|
|
|
my $OK = 1;
|
|
foreach my $language (sort keys %{$rhaa_Filters_by_Language}) {
|
|
next if defined $Extension_Collision{$language};
|
|
if (!defined $rh_Scale_Factor->{$language}) {
|
|
$OK = 0;
|
|
warn "Missing scale factor for $language\n";
|
|
}
|
|
}
|
|
|
|
my %seen_it = ();
|
|
foreach my $ext (sort keys %{$rh_Language_by_Extension}) {
|
|
my $language = $rh_Language_by_Extension->{$ext};
|
|
next if defined $Extension_Collision{$language};
|
|
next if $seen_it{$language};
|
|
if (!$rhaa_Filters_by_Language->{$language}) {
|
|
$OK = 0;
|
|
warn "Missing language filter for $language\n";
|
|
}
|
|
$seen_it{$language} = 1;
|
|
}
|
|
die unless $OK;
|
|
} # 1}}}
|
|
sub pre_post_fix { # {{{1
|
|
# Return the input lines prefixed and postfixed
|
|
# by the given strings.
|
|
my ($ra_lines, $prefix, $postfix ) = @_;
|
|
print "-> pre_post_fix with $prefix, $postfix\n" if $opt_v > 2;
|
|
|
|
my $all_lines = $prefix . join("" , @{$ra_lines}) . $postfix;
|
|
|
|
print "<- pre_post_fix\n" if $opt_v > 2;
|
|
return split("\n", $all_lines);
|
|
} # 1}}}
|
|
sub rm_last_line { # {{{1
|
|
# Return all but the last line.
|
|
my ($ra_lines, ) = @_;
|
|
print "-> rm_last_line\n" if $opt_v > 2;
|
|
print "<- rm_last_line\n" if $opt_v > 2;
|
|
my $n = scalar(@{$ra_lines}) - 2;
|
|
return @{$ra_lines}[0..$n];
|
|
} # 1}}}
|
|
sub call_regexp_common { # {{{1
|
|
my ($ra_lines, $language ) = @_;
|
|
print "-> call_regexp_common for $language\n" if $opt_v > 2;
|
|
|
|
Install_Regexp_Common() unless $HAVE_Rexexp_Common;
|
|
|
|
my $all_lines = undef;
|
|
if ($language eq "C++") { # Regexp::Common's C++ comment regex is multi-line
|
|
# $all_lines = join("\n", @{$ra_lines});
|
|
$all_lines = "";
|
|
foreach (@{$ra_lines}) {
|
|
if (m/\\$/) { # line ends with a continuation marker
|
|
$all_lines .= $_;
|
|
} else {
|
|
$all_lines .= "$_\n";
|
|
}
|
|
}
|
|
} else {
|
|
$all_lines = join("" , @{$ra_lines});
|
|
}
|
|
|
|
no strict 'vars';
|
|
# otherwise get:
|
|
# Global symbol "%RE" requires explicit package name at cloc line xx.
|
|
if ($all_lines =~ $RE{comment}{$language}) {
|
|
# Suppress "Use of uninitialized value in regexp compilation" that
|
|
# pops up when $1 is undefined--happens if there's a bug in the $RE
|
|
# This Pascal comment will trigger it:
|
|
# (* This is { another } test. **)
|
|
# Curiously, testing for "defined $1" breaks the substitution.
|
|
no warnings;
|
|
# Remove comments.
|
|
$all_lines =~ s/$1//g;
|
|
}
|
|
# a bogus use of %RE to avoid:
|
|
# Name "main::RE" used only once: possible typo at cloc line xx.
|
|
print scalar keys %RE if $opt_v < -20;
|
|
print "[", join("]\n[", @{$ra_lines}), "]\n" if $opt_v > 4;
|
|
print "<- call_regexp_common\n" if $opt_v > 2;
|
|
return split("\n", $all_lines);
|
|
} # 1}}}
|
|
sub plural_form { # {{{1
|
|
# For getting the right plural form on some English nouns.
|
|
my $n = shift @_;
|
|
if ($n == 1) { return ( 1, "" ); }
|
|
else { return ($n, "s"); }
|
|
} # 1}}}
|
|
sub matlab_or_objective_C { # {{{1
|
|
# Decide if code is MATLAB, Mathematica, Objective-C, MUMPS, or Mercury
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
$rs_language , # out
|
|
) = @_;
|
|
print "-> matlab_or_objective_C\n" if $opt_v > 2;
|
|
# matlab markers:
|
|
# first line starts with "function"
|
|
# some lines start with "%"
|
|
# high marks for lines that start with [
|
|
#
|
|
# Objective-C markers:
|
|
# must have at least two brace characters, { }
|
|
# has /* ... */ style comments
|
|
# some lines start with @
|
|
# some lines start with #include
|
|
#
|
|
# MUMPS:
|
|
# has ; comment markers
|
|
# do not match: \w+\s*=\s*\w
|
|
# lines begin with \s*\.?\w+\s+\w
|
|
# high marks for lines that start with \s*K\s+ or \s*Kill\s+
|
|
#
|
|
# Mercury:
|
|
# any line that begins with :- immediately triggers this
|
|
#
|
|
# Mathematica:
|
|
# (* .. *)
|
|
# BeginPackage
|
|
|
|
${$rs_language} = "";
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return;
|
|
}
|
|
|
|
my $DEBUG = 0;
|
|
|
|
my $matlab_points = 0;
|
|
my $mathematica_points = 0;
|
|
my $objective_C_points = 0;
|
|
my $mumps_points = 0;
|
|
my $mercury_points = 0;
|
|
my $has_braces = 0;
|
|
while (<$IN>) {
|
|
++$has_braces if $_ =~ m/[{}]/;
|
|
#print "LINE $. has_braces=$has_braces\n";
|
|
++$mumps_points if $. == 1 and m{^[A-Z]};
|
|
if (m{^\s*/\*} or m {^\s*//}) { # /* or //
|
|
$objective_C_points += 5;
|
|
$matlab_points -= 5;
|
|
printf ".m: /*|// obj C=% 2d matlab=% 2d mathematica=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
|
|
} elsif (m{^:-\s+}) { # gotta be mercury
|
|
$mercury_points = 1000;
|
|
last;
|
|
} elsif (m{\w+\s*=\s*\[}) { # matrix assignment, very matlab
|
|
$matlab_points += 5;
|
|
}
|
|
if (m{\w+\[}) { # function call by []
|
|
$mathematica_points += 2;
|
|
printf ".m: \\w=[ obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
|
|
} elsif (m{^\s*\w+\s*=\s*}) { # definitely not MUMPS
|
|
--$mumps_points;
|
|
printf ".m: \\w= obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
|
|
} elsif (m{^\s*\.?(\w)\s+(\w)} and $1 !~ /\d/ and $2 !~ /\d/) {
|
|
++$mumps_points;
|
|
printf ".m: \\w \\w obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
|
|
} elsif (m{^\s*;}) {
|
|
++$mumps_points;
|
|
printf ".m: ; obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
|
|
}
|
|
if (m{^\s*#(include|import)}) {
|
|
# Objective-C without a doubt
|
|
$objective_C_points = 1000;
|
|
$matlab_points = 0;
|
|
printf ".m: #include obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
|
|
$has_braces = 2;
|
|
last;
|
|
} elsif (m{^\s*@(interface|implementation|protocol|public|protected|private|end)\s}o) {
|
|
# Objective-C without a doubt
|
|
$objective_C_points = 1000;
|
|
$matlab_points = 0;
|
|
printf ".m: keyword obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
|
|
last;
|
|
} elsif (m{^\s*BeginPackage}) {
|
|
$mathematica_points += 2;
|
|
} elsif (m{^\s*\[}) { # line starts with [ -- very matlab
|
|
$matlab_points += 5;
|
|
printf ".m: [ obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
|
|
} elsif (m{^\sK(ill)?\s+}) {
|
|
$mumps_points += 5;
|
|
printf ".m: Kill obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
|
|
} elsif (m{^\s*function}) {
|
|
--$objective_C_points;
|
|
++$matlab_points;
|
|
printf ".m: funct obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
|
|
} elsif (m{^\s*%}) { # %
|
|
# matlab commented line
|
|
--$objective_C_points;
|
|
++$matlab_points;
|
|
printf ".m: pcent obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
|
|
}
|
|
}
|
|
$IN->close;
|
|
printf "END LOOP obj C=% 2d matlab=% 2d mumps=% 2d mercury= % 2d\n", $objective_C_points, $matlab_points, $mathematica_points, $mumps_points, $mercury_points if $DEBUG;
|
|
|
|
# next heuristic is unreliable for small files
|
|
# $objective_C_points = -9.9e20 unless $has_braces >= 2;
|
|
|
|
my %points = ( 'MATLAB' => $matlab_points ,
|
|
'Mathematica' => $mathematica_points ,
|
|
'MUMPS' => $mumps_points ,
|
|
'Objective-C' => $objective_C_points,
|
|
'Mercury' => $mercury_points , );
|
|
|
|
${$rs_language} = (sort { $points{$b} <=> $points{$a} or $a cmp $b } keys %points)[0];
|
|
|
|
print "<- matlab_or_objective_C($file: matlab=$matlab_points, mathematica=$mathematica_points, C=$objective_C_points, mumps=$mumps_points, mercury=$mercury_points) => ${$rs_language}\n"
|
|
if $opt_v > 2;
|
|
|
|
} # 1}}}
|
|
sub php_pascal_or_fortran { # {{{1
|
|
# Decide if code is Fortran, PHP, or Pascal
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
$rs_language , # out
|
|
) = @_;
|
|
print "-> php_pascal_or_fortran\n" if $opt_v > 2;
|
|
# fortran markers:
|
|
# 'implicit none' or 'implicit real' or 'implicit integer'
|
|
# 'program' or 'subroutine' or 'function' or 'module'
|
|
# 'end program' or 'end subroutine' or 'end function' or 'end module'
|
|
# 'write(', 'enddo'
|
|
#
|
|
# PHP:
|
|
# must have at least two brace characters, { }
|
|
# has /* ... */ style comments
|
|
# some lines start with @
|
|
# some lines start with #include
|
|
#
|
|
# Pascal:
|
|
# lines ending with ;
|
|
# writeln
|
|
|
|
${$rs_language} = "";
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return;
|
|
}
|
|
|
|
my $DEBUG = 0;
|
|
|
|
my $fortran_90 = 0;
|
|
my $fortran_points = 0;
|
|
my $php_points = 0;
|
|
my $pascal_points = 0;
|
|
while (<$IN>) {
|
|
if (/^\s*<\?php/i) {
|
|
$php_points += 100;
|
|
last;
|
|
} elsif (/^\s*end\./i ) {
|
|
$pascal_points += 100;
|
|
last;
|
|
} elsif (/^\s*implicit\s+(none|real|integer|complex|double)/i) {
|
|
$fortran_points += 100;
|
|
# don't end here; try to get 77 or 90 flavor
|
|
} elsif (/^\s*end;/i ) {
|
|
++$pascal_points;
|
|
last;
|
|
} elsif (/^\s*program\b/i) {
|
|
++$fortran_points;
|
|
++$pascal_points;
|
|
# 'function' is common to all three languages so ignore it
|
|
} elsif (/^\s*(subroutine|module|stop)\b/i or
|
|
/^\s*write\s*\(/i or
|
|
/^\s*end\s+(program|subroutine)/i or
|
|
/^\s*end\s*(if|do)\b/i) {
|
|
++$fortran_points;
|
|
} elsif (/^\s*(writeln|begin|procedure|interface|implementation|const|var)\b/i) {
|
|
++$pascal_points;
|
|
} elsif (/^\s*(<|\?>)/) {
|
|
++$php_points;
|
|
} elsif (/;\s*$/) {
|
|
++$pascal_points;
|
|
++$php_points;
|
|
}
|
|
if (/^\s*(integer|real|complex|double|character|logical|public|private).*?::/i or
|
|
/^\s*(allocatable|dimension)/i or
|
|
/^\s*(end\s+)?(interface|type|function|module)\b/i) {
|
|
++$fortran_90;
|
|
}
|
|
}
|
|
$IN->close;
|
|
|
|
my %points = ( 'Fortran' => $fortran_points ,
|
|
'PHP' => $php_points ,
|
|
'Pascal' => $pascal_points , );
|
|
|
|
${$rs_language} = (sort { $points{$b} <=> $points{$a} or $a cmp $b } keys %points)[0];
|
|
if (${$rs_language} eq 'Fortran') {
|
|
if ($fortran_90) {
|
|
${$rs_language} = 'Fortran 90';
|
|
} else {
|
|
${$rs_language} = 'Fortran 77';
|
|
}
|
|
}
|
|
|
|
print "<- php_pascal_or_fortran($file: fortran=$fortran_points, php=$php_points, pascal=$pascal_points) => ${$rs_language}\n"
|
|
if $opt_v > 2;
|
|
|
|
} # 1}}}
|
|
sub Lisp_or_OpenCL { # {{{1
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
|
|
print "-> Lisp_or_OpenCL\n" if $opt_v > 2;
|
|
|
|
my $lang = undef;
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return $lang;
|
|
}
|
|
my $lisp_points = 0;
|
|
my $opcl_points = 0;
|
|
while (<$IN>) {
|
|
++$lisp_points if /^\s*;/;
|
|
++$lisp_points if /\((def|eval|require|export|let|loop|dec|format)/;
|
|
++$opcl_points if /^\s*(int|float|const|{)/;
|
|
}
|
|
$IN->close;
|
|
# print "lisp_points=$lisp_points opcl_points=$opcl_points\n";
|
|
if ($lisp_points > $opcl_points) {
|
|
$lang = "Lisp";
|
|
} else {
|
|
$lang = "OpenCL";
|
|
}
|
|
|
|
print "<- Lisp_or_OpenCL\n" if $opt_v > 2;
|
|
return $lang;
|
|
} # 1}}}
|
|
sub Lisp_or_Julia { # {{{1
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
|
|
print "-> Lisp_or_Julia\n" if $opt_v > 2;
|
|
|
|
my $lang = undef;
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return $lang;
|
|
}
|
|
my $lisp_points = 0;
|
|
my $julia_points = 0;
|
|
while (<$IN>) {
|
|
++$lisp_points if /^\s*;/;
|
|
++$lisp_points if /\((def|eval|require|export|let|loop|dec|format)/;
|
|
++$julia_points if /^\s*(function|end|println|for|while)/;
|
|
}
|
|
$IN->close;
|
|
# print "lisp_points=$lisp_points julia_points=$julia_points\n";
|
|
if ($lisp_points > $julia_points) {
|
|
$lang = "Lisp";
|
|
} else {
|
|
$lang = "Julia";
|
|
}
|
|
|
|
print "<- Lisp_or_Julia\n" if $opt_v > 2;
|
|
return $lang;
|
|
} # 1}}}
|
|
sub Perl_or_Prolog { # {{{1
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
|
|
print "-> Perl_or_Prolog\n" if $opt_v > 2;
|
|
|
|
my $lang = undef;
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return $lang;
|
|
}
|
|
my $perl_points = 0;
|
|
my $prolog_points = 0;
|
|
while (<$IN>) {
|
|
next if /^\s*$/;
|
|
if ($. == 1 and /^#!.*?\bperl/) {
|
|
$perl_points = 100;
|
|
last;
|
|
}
|
|
++$perl_points if /^=(head|over|item|cut)/;
|
|
++$perl_points if /;\s*$/;
|
|
++$perl_points if /(\{|\})/;
|
|
++$perl_points if /^\s*sub\s+/;
|
|
++$perl_points if /\s*<<'/; # start HERE block
|
|
++$perl_points if /\$(\w+\->|[_!])/;
|
|
++$prolog_points if !/\s*#/ and /\.\s*$/;
|
|
++$prolog_points if /:-/;
|
|
}
|
|
$IN->close;
|
|
# print "perl_points=$perl_points prolog_points=$prolog_points\n";
|
|
if ($perl_points > $prolog_points) {
|
|
$lang = "Perl";
|
|
} else {
|
|
$lang = "Prolog";
|
|
}
|
|
|
|
printf "<- Perl_or_Prolog(%s, Perl=%d Prolog=%d)\n",
|
|
$file, $perl_points, $prolog_points if $opt_v > 2;
|
|
return $lang;
|
|
} # 1}}}
|
|
sub Raku_or_Prolog { # {{{1
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
|
|
print "-> Raku_or_Prolog\n" if $opt_v > 2;
|
|
my $lang = Perl_or_Prolog($file, $rh_Err, $raa_errors);
|
|
$lang = "Raku" if $lang eq "Perl";
|
|
|
|
print "<- Raku_or_Prolog\n" if $opt_v > 2;
|
|
return $lang;
|
|
} # 1}}}
|
|
sub IDL_or_QtProject { # {{{1
|
|
# IDL, QtProject, Prolog, or ProGuard
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
|
|
print "-> IDL_or_QtProject($file)\n" if $opt_v > 2;
|
|
|
|
my $lang = undef;
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return $lang;
|
|
}
|
|
my $idl_points = 0;
|
|
my $qtproj_points = 0;
|
|
my $prolog_points = 0;
|
|
my $proguard_points = 0;
|
|
while (<$IN>) {
|
|
++$idl_points if /^\s*;/;
|
|
++$idl_points if /plot\(/i;
|
|
++$qtproj_points if /^\s*(qt|configs|sources|template|target|targetpath|subdirs)\b/i;
|
|
++$qtproj_points if /qthavemodule/i;
|
|
++$prolog_points if /\.\s*$/;
|
|
++$prolog_points if /:-/;
|
|
++$proguard_points if /^\s*#/;
|
|
++$proguard_points if /^-keep/;
|
|
++$proguard_points if /^-(dont)?obfuscate/;
|
|
}
|
|
$IN->close;
|
|
# print "idl_points=$idl_points qtproj_points=$qtproj_points\n";
|
|
|
|
my %points = ( 'IDL' => $idl_points ,
|
|
'Qt Project' => $qtproj_points ,
|
|
'Prolog' => $prolog_points ,
|
|
'ProGuard' => $proguard_points ,
|
|
);
|
|
|
|
$lang = (sort { $points{$b} <=> $points{$a} or $a cmp $b} keys %points)[0];
|
|
|
|
print "<- IDL_or_QtProject(idl_points=$idl_points, ",
|
|
"qtproj_points=$qtproj_points, prolog_points=$prolog_points, ",
|
|
"proguard_points=$proguard_points)\n"
|
|
if $opt_v > 2;
|
|
return $lang;
|
|
} # 1}}}
|
|
sub Ant_or_XML { # {{{1
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
|
|
print "-> Ant_or_XML($file)\n" if $opt_v > 2;
|
|
|
|
my $lang = "XML";
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return $lang;
|
|
}
|
|
my $Ant_points = 0;
|
|
my $XML_points = 1;
|
|
while (<$IN>) {
|
|
if (/^\s*<project\s+/) {
|
|
++$Ant_points ;
|
|
--$XML_points ;
|
|
}
|
|
if (/xmlns:artifact="antlib:org.apache.maven.artifact.ant"/) {
|
|
++$Ant_points ;
|
|
--$XML_points ;
|
|
}
|
|
}
|
|
$IN->close;
|
|
|
|
if ($XML_points >= $Ant_points) {
|
|
# tie or better goes to XML
|
|
$lang = "XML";
|
|
} else {
|
|
$lang = "Ant";
|
|
}
|
|
|
|
print "<- Ant_or_XML($lang)\n" if $opt_v > 2;
|
|
return $lang;
|
|
} # 1}}}
|
|
sub Maven_or_XML { # {{{1
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
|
|
print "-> Maven_or_XML($file)\n" if $opt_v > 2;
|
|
|
|
my $lang = "XML";
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return $lang;
|
|
}
|
|
my $Mvn_points = 0;
|
|
my $XML_points = 1;
|
|
while (<$IN>) {
|
|
if (/^\s*<project\s+/) {
|
|
++$Mvn_points ;
|
|
--$XML_points ;
|
|
}
|
|
if (m{xmlns="http://maven.apache.org/POM/}) {
|
|
++$Mvn_points ;
|
|
--$XML_points ;
|
|
}
|
|
}
|
|
$IN->close;
|
|
|
|
if ($XML_points >= $Mvn_points) {
|
|
# tie or better goes to XML
|
|
$lang = "XML";
|
|
} else {
|
|
$lang = "Maven";
|
|
}
|
|
|
|
print "<- Maven_or_XML($lang)\n" if $opt_v > 2;
|
|
return $lang;
|
|
} # 1}}}
|
|
sub pascal_or_puppet { # {{{1
|
|
# Decide if code is Pascal or Puppet manifest
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
$rs_language , # out
|
|
) = @_;
|
|
|
|
print "-> pascal_or_puppet\n" if $opt_v > 2;
|
|
|
|
${$rs_language} = "";
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return;
|
|
}
|
|
|
|
my $DEBUG = 0;
|
|
my $pascal_points = 0;
|
|
my $puppet_points = 0;
|
|
|
|
while (<$IN>) {
|
|
|
|
if ( /^\s*\#\s+/ ) {
|
|
$puppet_points += .001;
|
|
next;
|
|
}
|
|
|
|
++$pascal_points if /\bprogram\s+[A-Za-z]/i;
|
|
++$pascal_points if /\bunit\s+[A-Za-z]/i;
|
|
++$pascal_points if /\bmodule\s+[A-Za-z]/i;
|
|
++$pascal_points if /\bprocedure\b/i;
|
|
++$pascal_points if /\bfunction\b/i;
|
|
++$pascal_points if /^\s*interface\s+/i;
|
|
++$pascal_points if /^\s*implementation\s+/i;
|
|
++$pascal_points if /^\s*uses\s+/i;
|
|
++$pascal_points if /(?<!\:\:)\bbegin\b(?!\:\:)/i;
|
|
++$pascal_points if /(?<!\:\:)\bend\b(?!\:\:)/i;
|
|
++$pascal_points if /\:\=/;
|
|
++$pascal_points if /\<\>/;
|
|
++$pascal_points if /^\s*\{\$(I|INCLUDE)\s+.*\}/i;
|
|
++$pascal_points if /writeln/;
|
|
|
|
++$puppet_points if /^\s*class\s+/ and not /class\s+operator\s+/;
|
|
++$puppet_points if /^\s*function\s+[a-z][a-z0-9]+::[a-z][a-z0-9]+\s*/;
|
|
++$puppet_points if /^\s*type\s+[A-Z]\w+::[A-Z]\w+\s+/;
|
|
++$puppet_points if /^\s*case\s+/;
|
|
++$puppet_points if /^\s*package\s+/;
|
|
++$puppet_points if /^\s*file\s+/;
|
|
++$puppet_points if /^\s*include\s\w+/;
|
|
++$puppet_points if /^\s*service\s+/;
|
|
++$puppet_points if /\s\$\w+\s*\=\s*\S/;
|
|
++$puppet_points if /\S\s*\=\>\s*\S/;
|
|
|
|
# No need to process rest of file if language seems obvious.
|
|
last
|
|
if (abs ($pascal_points - $puppet_points ) > 20 );
|
|
}
|
|
$IN->close;
|
|
|
|
print "<- pascal_or_puppet(pascal=$pascal_points, puppet=$puppet_points)\n"
|
|
if $opt_v > 2;
|
|
|
|
if ($pascal_points > $puppet_points) {
|
|
${$rs_language} = "Pascal";
|
|
} else {
|
|
${$rs_language} = "Puppet";
|
|
}
|
|
|
|
} # 1}}}
|
|
sub Forth_or_Fortran { # {{{1
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
|
|
print "-> Forth_or_Fortran\n" if $opt_v > 2;
|
|
|
|
my $lang = undef;
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return $lang;
|
|
}
|
|
my $forth_points = 0;
|
|
my $fortran_points = 0;
|
|
while (<$IN>) {
|
|
++$forth_points if /^:\s/;
|
|
++$fortran_points if /^([c*][^a-z]|\s{6,}(subroutine|program|end|implicit)\s|\s*!)/i;
|
|
}
|
|
$IN->close;
|
|
if ($forth_points > $fortran_points) {
|
|
$lang = "Forth";
|
|
} else {
|
|
$lang = "Fortran 77";
|
|
}
|
|
|
|
print "<- Forth_or_Fortran\n" if $opt_v > 2;
|
|
return $lang;
|
|
} # 1}}}
|
|
sub Forth_or_Fsharp { # {{{1
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
|
|
print "-> Forth_or_Fsharp\n" if $opt_v > 2;
|
|
|
|
my $lang = undef;
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return $lang;
|
|
}
|
|
my $forth_points = 0;
|
|
my $fsharp_points = 0;
|
|
while (<$IN>) {
|
|
++$forth_points if /^:\s/;
|
|
++$fsharp_points if /^\s*(#light|import|let|module|namespace|open|type)/;
|
|
}
|
|
$IN->close;
|
|
if ($forth_points > $fsharp_points) {
|
|
$lang = "Forth";
|
|
} else {
|
|
$lang = "F#";
|
|
}
|
|
|
|
print "<- Forth_or_Fsharp\n" if $opt_v > 2;
|
|
return $lang;
|
|
} # 1}}}
|
|
sub Verilog_or_Coq { # {{{1
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
|
|
print "-> Verilog_or_Coq\n" if $opt_v > 2;
|
|
|
|
my $lang = undef;
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return $lang;
|
|
}
|
|
my $coq_points = 0;
|
|
my $verilog_points = 0;
|
|
while (<$IN>) {
|
|
++$verilog_points if /^\s*(module|begin|input|output|always)/;
|
|
++$coq_points if /\b(Inductive|Fixpoint|Definition|
|
|
Theorem|Lemma|Proof|Qed|forall|
|
|
Section|Check|Notation|Variable|
|
|
Goal|Fail|Require|Scheme|Module|Ltac|
|
|
Set|Unset|Parameter|Coercion|Axiom|
|
|
Locate|Type|Record|Existing|Class)\b/x;
|
|
}
|
|
$IN->close;
|
|
if ($coq_points > $verilog_points) {
|
|
$lang = "Coq";
|
|
} else {
|
|
$lang = "Verilog-SystemVerilog";
|
|
}
|
|
|
|
print "<- Verilog_or_Coq\n" if $opt_v > 2;
|
|
return $lang;
|
|
} # 1}}}
|
|
sub TypeScript_or_QtLinguist { # {{{1
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
|
|
print "-> TypeScript_or_QtLinguist\n" if $opt_v > 2;
|
|
|
|
my $lang = undef;
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return $lang;
|
|
}
|
|
my $tscript_points = 0;
|
|
my $linguist_points = 0;
|
|
while (<$IN>) {
|
|
++$linguist_points if m{\b</?(message|source|translation)>};
|
|
++$tscript_points if /^\s*(var|const|let|class|document)\b/;
|
|
++$tscript_points if /[;}]\s*$/;
|
|
++$tscript_points if m{^\s*//};
|
|
}
|
|
$IN->close;
|
|
if ($tscript_points >= $linguist_points) {
|
|
$lang = "TypeScript";
|
|
} else {
|
|
$lang = "Qt Linguist";
|
|
}
|
|
print "<- TypeScript_or_QtLinguist\n" if $opt_v > 2;
|
|
return $lang;
|
|
} # 1}}}
|
|
sub Qt_or_Glade { # {{{1
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
|
|
print "-> Qt_or_Glade\n" if $opt_v > 2;
|
|
|
|
my $lang = undef;
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return $lang;
|
|
}
|
|
my $qt_points = 1;
|
|
my $glade_points = -1;
|
|
while (<$IN>) {
|
|
if (/generated\s+with\s+glade/i) {
|
|
$glade_points = 1;
|
|
$qt_points = -1;
|
|
last;
|
|
}
|
|
}
|
|
$IN->close;
|
|
if ($glade_points > $qt_points) {
|
|
$lang = "Glade";
|
|
} else {
|
|
$lang = "Qt";
|
|
}
|
|
print "<- Qt_or_Glade\n" if $opt_v > 2;
|
|
return $lang;
|
|
} # 1}}}
|
|
sub Csharp_or_Smalltalk { # {{{1
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
|
|
print "-> Csharp_or_Smalltalk($file)\n" if $opt_v > 2;
|
|
|
|
my $lang = undef;
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return $lang;
|
|
}
|
|
my $cs_points = 0;
|
|
my $smalltalk_points = 0;
|
|
while (<$IN>) {
|
|
s{//.*?$}{}; # strip inline C# comments for better clarity
|
|
next if /^\s*$/;
|
|
if (/[;}{]\s*$/) {
|
|
++$cs_points ;
|
|
} elsif (/^(using|namespace)\s/) {
|
|
$cs_points += 20;
|
|
} elsif (/^\s*(public|private|new)\s/) {
|
|
$cs_points += 20;
|
|
} elsif (/^\s*\[assembly:/) {
|
|
++$cs_points ;
|
|
}
|
|
if (/(\!|\]\.)\s*$/) {
|
|
++$smalltalk_points;
|
|
--$cs_points ;
|
|
}
|
|
}
|
|
$IN->close;
|
|
if ($smalltalk_points > $cs_points) {
|
|
$lang = "Smalltalk";
|
|
} else {
|
|
$lang = "C#";
|
|
}
|
|
print "<- Csharp_or_Smalltalk($file)=$lang\n" if $opt_v > 2;
|
|
return $lang;
|
|
} # 1}}}
|
|
sub Visual_Basic_or_TeX_or_Apex { # {{{1
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
$rs_language , # out
|
|
) = @_;
|
|
|
|
print "-> Visual_Basic_or_TeX_or_Apex($file)\n" if $opt_v > 2;
|
|
|
|
my $lang = undef;
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return $lang;
|
|
}
|
|
my $VB_points = 0;
|
|
my $tex_points = 0;
|
|
my $apex_points = 0;
|
|
while (<$IN>) {
|
|
next if /^\s*$/;
|
|
#print "$_";
|
|
if (/\s*%/ or /\s*\\/) {
|
|
++$tex_points ;
|
|
} else {
|
|
if (/^\s*(public|private)\s/i) {
|
|
++$VB_points ;
|
|
++$apex_points ;
|
|
#print "+VB1 +A1";
|
|
} elsif (/^\s*(end|attribute|version)\s/i) {
|
|
++$VB_points ;
|
|
#print "+VB2";
|
|
}
|
|
if (/[{}]/ or /;\s*$/) {
|
|
++$apex_points ;
|
|
#print "+A2";
|
|
}
|
|
#print "\n";
|
|
}
|
|
}
|
|
$IN->close;
|
|
|
|
my %points = ( 'Visual Basic' => $VB_points ,
|
|
'TeX' => $tex_points ,
|
|
'Apex Class' => $apex_points ,);
|
|
|
|
${$rs_language} = (sort { $points{$b} <=> $points{$a} or $a cmp $b } keys %points)[0];
|
|
|
|
print "<- Visual_Basic_or_TeX_or_Apex($file: VB=$VB_points, TeX=$tex_points, Apex=$apex_points\n" if $opt_v > 2;
|
|
return $lang;
|
|
} # 1}}}
|
|
sub Scheme_or_SaltStack { # {{{1
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
|
|
print "-> Scheme_or_SaltStack($file)\n" if $opt_v > 2;
|
|
|
|
my $lang = undef;
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file];
|
|
return $lang;
|
|
}
|
|
my $Sch_points = 0;
|
|
my $SS_points = 0;
|
|
while (<$IN>) {
|
|
next if /^\s*$/;
|
|
if (/{\%.*%}/) {
|
|
$SS_points += 5;
|
|
} elsif (/map\.jinja\b/) {
|
|
$SS_points += 5;
|
|
} elsif (/\((define|lambda|let|cond|do)\s/) {
|
|
$Sch_points += 1;
|
|
} else {
|
|
}
|
|
}
|
|
$IN->close;
|
|
|
|
print "<- Scheme_or_SaltStack($file: Scheme=$Sch_points, SaltStack=$SS_points\n" if $opt_v > 2;
|
|
if ($Sch_points > $SS_points) {
|
|
return "Scheme";
|
|
} else {
|
|
return "SaltStack";
|
|
}
|
|
} # 1}}}
|
|
sub html_colored_text { # {{{1
|
|
# http://www.pagetutor.com/pagetutor/makapage/pics/net216-2.gif
|
|
my ($color, $text) = @_;
|
|
#?#die "html_colored_text($text)";
|
|
if ($color =~ /^red$/i) {
|
|
$color = "#ff0000";
|
|
} elsif ($color =~ /^green$/i) {
|
|
$color = "#00ff00";
|
|
} elsif ($color =~ /^blue$/i) {
|
|
$color = "#0000ff";
|
|
} elsif ($color =~ /^grey$/i) {
|
|
$color = "#cccccc";
|
|
}
|
|
# return "" unless $text;
|
|
return '<font color="' . $color . '">' . html_metachars($text) . "</font>";
|
|
} # 1}}}
|
|
sub xml_metachars { # {{{1
|
|
# http://en.wikipedia.org/wiki/Character_encodings_in_HTML#XML_character_references
|
|
my ($string, ) = shift @_;
|
|
|
|
my @in_chars = split(//, $string);
|
|
my @out_chars = ();
|
|
foreach my $c (@in_chars) {
|
|
if ($c eq '&') { push @out_chars, '&'
|
|
} elsif ($c eq '<') { push @out_chars, '<'
|
|
} elsif ($c eq '>') { push @out_chars, '>'
|
|
} elsif ($c eq '"') { push @out_chars, '"'
|
|
} elsif ($c eq "'") { push @out_chars, '''
|
|
} else {
|
|
push @out_chars, $c;
|
|
}
|
|
}
|
|
return join "", @out_chars;
|
|
} # 1}}}
|
|
sub html_metachars { # {{{1
|
|
# Replace HTML metacharacters with their printable forms.
|
|
# Future: use HTML-Encoder-0.00_04/lib/HTML/Encoder.pm
|
|
# from Fabiano Reese Righetti's HTML::Encoder module if
|
|
# this subroutine proves to be too simplistic.
|
|
my ($string, ) = shift @_;
|
|
|
|
my @in_chars = split(//, $string);
|
|
my @out_chars = ();
|
|
foreach my $c (@in_chars) {
|
|
if ($c eq '<') {
|
|
push @out_chars, '<'
|
|
} elsif ($c eq '>') {
|
|
push @out_chars, '>'
|
|
} elsif ($c eq '&') {
|
|
push @out_chars, '&'
|
|
} else {
|
|
push @out_chars, $c;
|
|
}
|
|
}
|
|
return join "", @out_chars;
|
|
} # 1}}}
|
|
sub test_alg_diff { # {{{1
|
|
my ($file_1 ,
|
|
$file_2 )
|
|
= @_;
|
|
my $fh_1 = open_file('<', $file_1, 1);
|
|
die "Unable to read $file_1: $!\n" unless defined $fh_1;
|
|
chomp(my @lines_1 = <$fh_1>);
|
|
$fh_1->close;
|
|
|
|
my $fh_2 = open_file('<', $file_2, 1);
|
|
die "Unable to read $file_2: $!\n" unless defined $fh_2;
|
|
chomp(my @lines_2 = <$fh_2>);
|
|
$fh_2->close;
|
|
|
|
my $n_no_change = 0;
|
|
my $n_modified = 0;
|
|
my $n_added = 0;
|
|
my $n_deleted = 0;
|
|
my @min_sdiff = ();
|
|
my $NN = chr(27) . "[0m"; # normal
|
|
my $BB = chr(27) . "[1m"; # bold
|
|
|
|
my @sdiffs = sdiff( \@lines_1, \@lines_2 );
|
|
foreach my $entry (@sdiffs) {
|
|
my ($out_1, $out_2) = ('', '');
|
|
if ($entry->[0] eq 'u') {
|
|
++$n_no_change;
|
|
# $out_1 = $entry->[1];
|
|
# $out_2 = $entry->[2];
|
|
next;
|
|
}
|
|
# push @min_sdiff, $entry;
|
|
if ($entry->[0] eq 'c') {
|
|
++$n_modified;
|
|
($out_1, $out_2) = diff_two_strings($entry->[1], $entry->[2]);
|
|
$out_1 =~ s/\cA(\w)/${BB}$1${NN}/g;
|
|
$out_2 =~ s/\cA(\w)/${BB}$1${NN}/g;
|
|
# $out_1 =~ s/\cA//g;
|
|
# $out_2 =~ s/\cA//g;
|
|
} elsif ($entry->[0] eq '+') {
|
|
++$n_added;
|
|
$out_1 = $entry->[1];
|
|
$out_2 = $entry->[2];
|
|
} elsif ($entry->[0] eq '-') {
|
|
++$n_deleted;
|
|
$out_1 = $entry->[1];
|
|
$out_2 = $entry->[2];
|
|
} elsif ($entry->[0] eq 'u') {
|
|
} else { die "unknown entry->[0]=[$entry->[0]]\n"; }
|
|
printf "%-80s | %s\n", $out_1, $out_2;
|
|
}
|
|
|
|
# foreach my $entry (@min_sdiff) {
|
|
# printf "DIFF: %s %s %s\n", @{$entry};
|
|
# }
|
|
} # 1}}}
|
|
sub write_comments_to_html { # {{{1
|
|
my ($filename , # in
|
|
$rah_diff_L , # in see routine array_diff() for explanation
|
|
$rah_diff_R , # in see routine array_diff() for explanation
|
|
$rh_blank , # in location and counts of blank lines
|
|
) = @_;
|
|
|
|
print "-> write_comments_to_html($filename)\n" if $opt_v > 2;
|
|
my $file = $filename . ".html";
|
|
|
|
my $approx_line_count = scalar @{$rah_diff_L};
|
|
$approx_line_count = 1 unless $approx_line_count;
|
|
my $n_digits = 1 + int(log($approx_line_count)/2.30258509299405); # log_10
|
|
|
|
my $html_out = html_header($filename);
|
|
|
|
my $comment_line_number = 0;
|
|
for (my $i = 0; $i < scalar @{$rah_diff_R}; $i++) {
|
|
if (defined $rh_blank->{$i}) {
|
|
foreach (1..$rh_blank->{$i}) {
|
|
$html_out .= "<!-- blank -->\n";
|
|
}
|
|
}
|
|
my $line_num = "";
|
|
my $pre = "";
|
|
my $post = '</span> ';
|
|
warn "undef rah_diff_R[$i]{type} " unless defined $rah_diff_R->[$i]{type};
|
|
if ($rah_diff_R->[$i]{type} eq 'nonexist') {
|
|
++$comment_line_number;
|
|
$line_num = sprintf "\ <span class=\"clinenum\"> %0${n_digits}d %s",
|
|
$comment_line_number, $post;
|
|
$pre = '<span class="comment">';
|
|
$html_out .= $line_num;
|
|
$html_out .= $pre .
|
|
html_metachars($rah_diff_L->[$i]{char}) .
|
|
$post . "\n";
|
|
next;
|
|
}
|
|
if ($rah_diff_R->[$i]{type} eq 'code' and
|
|
$rah_diff_R->[$i]{desc} eq 'same') {
|
|
# entire line remains as-is
|
|
$line_num = sprintf "\ <span class=\"linenum\"> %0${n_digits}d %s",
|
|
$rah_diff_R->[$i]{lnum}, $post;
|
|
$pre = '<span class="normal">';
|
|
$html_out .= $line_num;
|
|
$html_out .= $pre .
|
|
html_metachars($rah_diff_R->[$i]{char}) . $post;
|
|
#XX } elsif ($rah_diff_R->[$i]{type} eq 'code') { # code+comments
|
|
#XX
|
|
#XX $line_num = '<span class="linenum">' .
|
|
#XX $rah_diff_R->[$i]{lnum} . $post;
|
|
#XX $html_out .= $line_num;
|
|
#XX
|
|
#XX my @strings = @{$rah_diff_R->[$i]{char}{strings}};
|
|
#XX my @type = @{$rah_diff_R->[$i]{char}{type}};
|
|
#XX for (my $i = 0; $i < scalar @strings; $i++) {
|
|
#XX if ($type[$i] eq 'u') {
|
|
#XX $pre = '<span class="normal">';
|
|
#XX } else {
|
|
#XX $pre = '<span class="comment">';
|
|
#XX }
|
|
#XX $html_out .= $pre . html_metachars($strings[$i]) . $post;
|
|
#XX }
|
|
# print Dumper(@strings, @type); die;
|
|
|
|
} elsif ($rah_diff_R->[$i]{type} eq 'comment') {
|
|
$line_num = '<span class="clinenum">' . $comment_line_number . $post;
|
|
# entire line is a comment
|
|
$pre = '<span class="comment">';
|
|
$html_out .= $pre .
|
|
html_metachars($rah_diff_R->[$i]{char}) . $post;
|
|
}
|
|
#printf "%-30s %s %-30s\n", $line_1, $separator, $line_2;
|
|
$html_out .= "\n";
|
|
}
|
|
|
|
$html_out .= html_end();
|
|
|
|
my $out_file = "$filename.html";
|
|
write_file($out_file, {}, ( $html_out ) );
|
|
|
|
print "<- write_comments_to_html\n" if $opt_v > 2;
|
|
} # 1}}}
|
|
sub array_diff { # {{{1
|
|
my ($file , # in only used for error reporting
|
|
$ra_lines_L , # in array of lines in Left file (no blank lines)
|
|
$ra_lines_R , # in array of lines in Right file (no blank lines)
|
|
$mode , # in "comment" | "revision"
|
|
$rah_diff_L , # out
|
|
$rah_diff_R , # out
|
|
$raa_Errors , # in/out
|
|
) = @_;
|
|
|
|
# This routine operates in two ways:
|
|
# A. Computes diffs of the same file with and without comments.
|
|
# This is used to classify lines as code, comments, or blank.
|
|
# B. Computes diffs of two revisions of a file. This method
|
|
# requires a prior run of method A using the older version
|
|
# of the file because it needs lines to be classified.
|
|
|
|
# $rah_diff structure:
|
|
# An array with n entries where n equals the number of lines in
|
|
# an sdiff of the two files. Each entry in the array describes
|
|
# the contents of the corresponding line in file Left and file Right:
|
|
# diff[]{type} = blank | code | code+comment | comment | nonexist
|
|
# {lnum} = line number within the original file (1-based)
|
|
# {desc} = same | added | removed | modified
|
|
# {char} = the input line unless {desc} = 'modified' in
|
|
# which case
|
|
# {char}{strings} = [ substrings ]
|
|
# {char}{type} = [ disposition (added, removed, etc)]
|
|
#
|
|
|
|
@{$rah_diff_L} = ();
|
|
@{$rah_diff_R} = ();
|
|
|
|
print "-> array_diff()\n" if $opt_v > 2;
|
|
my $COMMENT_MODE = 0;
|
|
$COMMENT_MODE = 1 if $mode eq "comment";
|
|
|
|
#print "array_diff(mode=$mode)\n";
|
|
#print Dumper("block left:" , $ra_lines_L);
|
|
#print Dumper("block right:", $ra_lines_R);
|
|
|
|
my @sdiffs = ();
|
|
eval {
|
|
local $SIG{ALRM} = sub { die "alarm\n" };
|
|
alarm $opt_diff_timeout;
|
|
@sdiffs = sdiff($ra_lines_L, $ra_lines_R);
|
|
alarm 0;
|
|
};
|
|
if ($@) {
|
|
# timed out
|
|
die unless $@ eq "alarm\n"; # propagate unexpected errors
|
|
push @{$raa_Errors},
|
|
[ $Error_Codes{'Diff error, exceeded timeout'}, $file ];
|
|
if ($opt_v) {
|
|
warn "array_diff: diff timeout failure for $file--ignoring\n";
|
|
}
|
|
return;
|
|
}
|
|
|
|
my $n_L = 0;
|
|
my $n_R = 0;
|
|
my $n_sdiff = 0; # index to $rah_diff_L, $rah_diff_R
|
|
foreach my $triple (@sdiffs) {
|
|
my $flag = $triple->[0];
|
|
my $line_L = $triple->[1];
|
|
my $line_R = $triple->[2];
|
|
$rah_diff_L->[$n_sdiff]{char} = $line_L;
|
|
$rah_diff_R->[$n_sdiff]{char} = $line_R;
|
|
if ($flag eq 'u') { # u = unchanged
|
|
++$n_L;
|
|
++$n_R;
|
|
if ($COMMENT_MODE) {
|
|
# line exists in both with & without comments, must be code
|
|
$rah_diff_L->[$n_sdiff]{type} = "code";
|
|
$rah_diff_R->[$n_sdiff]{type} = "code";
|
|
}
|
|
$rah_diff_L->[$n_sdiff]{desc} = "same";
|
|
$rah_diff_R->[$n_sdiff]{desc} = "same";
|
|
$rah_diff_L->[$n_sdiff]{lnum} = $n_L;
|
|
$rah_diff_R->[$n_sdiff]{lnum} = $n_R;
|
|
} elsif ($flag eq 'c') { # c = changed
|
|
# warn "per line sdiff() commented out\n"; if (0) {
|
|
++$n_L;
|
|
++$n_R;
|
|
|
|
if ($COMMENT_MODE) {
|
|
# line has text both with & without comments;
|
|
# count as code
|
|
$rah_diff_L->[$n_sdiff]{type} = "code";
|
|
$rah_diff_R->[$n_sdiff]{type} = "code";
|
|
}
|
|
|
|
my @chars_L = split '', $line_L;
|
|
my @chars_R = split '', $line_R;
|
|
|
|
$rah_diff_L->[$n_sdiff]{desc} = "modified";
|
|
$rah_diff_R->[$n_sdiff]{desc} = "modified";
|
|
$rah_diff_L->[$n_sdiff]{lnum} = $n_L;
|
|
$rah_diff_R->[$n_sdiff]{lnum} = $n_R;
|
|
|
|
} elsif ($flag eq '+') { # + = added
|
|
++$n_R;
|
|
if ($COMMENT_MODE) {
|
|
# should never get here, but may due to sdiff() bug,
|
|
# ref https://rt.cpan.org/Public/Bug/Display.html?id=131629
|
|
# Rather than failing, ignore and continue. A possible
|
|
# consequence is counts may be inconsistent.
|
|
##### @{$rah_diff_L} = ();
|
|
##### @{$rah_diff_R} = ();
|
|
##### push @{$raa_Errors},
|
|
##### [ $Error_Codes{'Diff error (quoted comments?)'}, $file ];
|
|
if ($opt_v) {
|
|
warn "array_diff: diff failure (diff says the\n";
|
|
warn "comment-free file has added lines).\n";
|
|
warn "$n_sdiff $line_L\n";
|
|
}
|
|
}
|
|
#### $rah_diff_L->[$n_sdiff]{type} = "nonexist";
|
|
$rah_diff_L->[$n_sdiff]{type} = "comment";
|
|
$rah_diff_L->[$n_sdiff]{desc} = "removed";
|
|
$rah_diff_R->[$n_sdiff]{desc} = "added";
|
|
$rah_diff_R->[$n_sdiff]{lnum} = $n_R;
|
|
} elsif ($flag eq '-') { # - = removed
|
|
++$n_L;
|
|
if ($COMMENT_MODE) {
|
|
# line must be comment because blanks already gone
|
|
$rah_diff_L->[$n_sdiff]{type} = "comment";
|
|
}
|
|
$rah_diff_R->[$n_sdiff]{type} = "nonexist";
|
|
$rah_diff_R->[$n_sdiff]{desc} = "removed";
|
|
$rah_diff_L->[$n_sdiff]{desc} = "added";
|
|
$rah_diff_L->[$n_sdiff]{lnum} = $n_L;
|
|
}
|
|
#printf "%-30s %s %-30s\n", $line_L, $separator, $line_R;
|
|
++$n_sdiff;
|
|
}
|
|
#use Data::Dumper::Simple;
|
|
#print Dumper($rah_diff_L, $rah_diff_R);
|
|
#print Dumper($rah_diff_L);
|
|
|
|
print "<- array_diff\n" if $opt_v > 2;
|
|
} # 1}}}
|
|
sub remove_leading_dir { # {{{1
|
|
my @filenames = @_;
|
|
#
|
|
# Input should be a list of file names
|
|
# with the same leading directory such as
|
|
#
|
|
# dir1/dir2/a.txt
|
|
# dir1/dir2/b.txt
|
|
# dir1/dir2/dir3/c.txt
|
|
#
|
|
# Output is the same list minus the common
|
|
# directory path:
|
|
#
|
|
# a.txt
|
|
# b.txt
|
|
# dir3/c.txt
|
|
#
|
|
print "-> remove_leading_dir()\n" if $opt_v > 2;
|
|
my @D = (); # a matrix: [ [ dir1, dir2 ], # dir1/dir2/a.txt
|
|
# [ dir1, dir2 ], # dir1/dir2/b.txt
|
|
# [ dir1, dir2 , dir3] ] # dir1/dir2/dir3/c.txt
|
|
if ($ON_WINDOWS) {
|
|
foreach my $F (@filenames) {
|
|
$F =~ s{\\}{/}g;
|
|
$F = ucfirst($F) if $F =~ /^\w:/; # uppercase drive letter
|
|
}
|
|
}
|
|
if (scalar @filenames == 1) {
|
|
# special case: with only one filename
|
|
# cannot determine a baseline, just remove first directory level
|
|
$filenames[0] =~ s{^.*?/}{};
|
|
# print "-> $filenames[0]\n";
|
|
return $filenames[0];
|
|
}
|
|
foreach my $F (@filenames) {
|
|
my ($Vol, $Dir, $File) = File::Spec->splitpath($F);
|
|
my @x = File::Spec->splitdir( $Dir );
|
|
pop @x unless $x[$#x]; # last entry usually null, remove it
|
|
if ($ON_WINDOWS) {
|
|
if (defined($Vol) and $Vol) {
|
|
# put the drive letter, eg, C:, at the front
|
|
unshift @x, uc $Vol;
|
|
}
|
|
}
|
|
#print "F=$F, Dir=$Dir x=[", join("][", @x), "]\n";
|
|
push @D, [ @x ];
|
|
}
|
|
|
|
# now loop over columns until either they are all
|
|
# eliminated or a unique column is found
|
|
|
|
my @common = (); # to contain the common leading directories
|
|
my $mismatch = 0;
|
|
while (!$mismatch) {
|
|
for (my $row = 1; $row < scalar @D; $row++) {
|
|
#print "comparing $D[$row][0] to $D[0][0]\n";
|
|
|
|
if (!defined $D[$row][0] or !defined $D[0][0] or
|
|
($D[$row][0] ne $D[0][0])) {
|
|
$mismatch = 1;
|
|
last;
|
|
}
|
|
}
|
|
#print "mismatch=$mismatch\n";
|
|
if (!$mismatch) {
|
|
push @common, $D[0][0];
|
|
# all terms in the leading match; unshift the batch
|
|
foreach my $ra (@D) {
|
|
shift @{$ra};
|
|
}
|
|
}
|
|
}
|
|
|
|
push @common, " "; # so that $leading will end with "/ "
|
|
my $leading = File::Spec->catdir( @common );
|
|
$leading =~ s{ $}{}; # now take back the bogus appended space
|
|
#print "remove_leading_dir leading=[$leading]\n"; die;
|
|
if ($ON_WINDOWS) {
|
|
$leading =~ s{\\}{/}g;
|
|
}
|
|
foreach my $F (@filenames) {
|
|
$F =~ s{^$leading}{};
|
|
}
|
|
|
|
print "<- remove_leading_dir()\n" if $opt_v > 2;
|
|
return @filenames;
|
|
|
|
} # 1}}}
|
|
sub strip_leading_dir { # {{{1
|
|
my ($leading, @filenames) = @_;
|
|
# removes the string $leading from each entry in @filenames
|
|
print "-> strip_leading_dir()\n" if $opt_v > 2;
|
|
|
|
#print "remove_leading_dir leading=[$leading]\n"; die;
|
|
if ($ON_WINDOWS) {
|
|
$leading =~ s{\\}{/}g;
|
|
foreach my $F (@filenames) {
|
|
$F =~ s{\\}{/}g;
|
|
}
|
|
}
|
|
foreach my $F (@filenames) {
|
|
#print "strip_leading_dir F before $F\n";
|
|
if ($ON_WINDOWS) {
|
|
$F =~ s{^$leading}{}i;
|
|
} else {
|
|
$F =~ s{^$leading}{};
|
|
}
|
|
#print "strip_leading_dir F after $F\n";
|
|
}
|
|
|
|
print "<- strip_leading_dir()\n" if $opt_v > 2;
|
|
return @filenames;
|
|
|
|
} # 1}}}
|
|
sub find_deepest_file { # {{{1
|
|
my @filenames = @_;
|
|
#
|
|
# Input should be a list of file names
|
|
# with the same leading directory such as
|
|
#
|
|
# dir1/dir2/a.txt
|
|
# dir1/dir2/b.txt
|
|
# dir1/dir2/dir3/c.txt
|
|
#
|
|
# Output is the file with the most parent directories:
|
|
#
|
|
# dir1/dir2/dir3/c.txt
|
|
|
|
print "-> find_deepest_file()\n" if $opt_v > 2;
|
|
|
|
my $deepest = undef;
|
|
my $max_subdir = -1;
|
|
foreach my $F (sort @filenames) {
|
|
my ($Vol, $Dir, $File) = File::Spec->splitpath($F);
|
|
my @x = File::Spec->splitdir( $Dir );
|
|
pop @x unless $x[$#x]; # last entry usually null, remove it
|
|
if (scalar @x > $max_subdir) {
|
|
$deepest = $F;
|
|
$max_subdir = scalar @x;
|
|
}
|
|
}
|
|
|
|
print "<- find_deepest_file()\n" if $opt_v > 2;
|
|
return $deepest;
|
|
|
|
} # 1}}}
|
|
sub find_uncommon_parent_dir { # {{{1
|
|
my ($file_L, $file_R) = @_;
|
|
#
|
|
# example:
|
|
#
|
|
# file_L = "perl-5.16.1/cpan/CPANPLUS/lib/CPANPLUS/Internals/Source/SQLite/Tie.pm"
|
|
# file_R = "/tmp/8VxQG0OLbp/perl-5.16.3/cpan/CPANPLUS/lib/CPANPLUS/Internals/Source/SQLite/Tie.pm"
|
|
#
|
|
# then return
|
|
#
|
|
# "perl-5.16.1",
|
|
# "/tmp/8VxQG0OLbp/perl-5.16.3",
|
|
|
|
my ($Vol_L, $Dir_L, $File_L) = File::Spec->splitpath($file_L);
|
|
my @x_L = File::Spec->splitdir( $Dir_L );
|
|
my ($Vol_R, $Dir_R, $File_R) = File::Spec->splitpath($file_R);
|
|
my @x_R = File::Spec->splitdir( $Dir_R );
|
|
|
|
my @common = ();
|
|
|
|
# work backwards
|
|
while ($x_L[$#x_L] eq $x_R[$#x_R]) {
|
|
push @common, $x_L[$#x_L];
|
|
pop @x_L;
|
|
pop @x_R;
|
|
}
|
|
my $success = scalar @common;
|
|
|
|
my $dirs_L = File::Spec->catdir( @x_L );
|
|
my $dirs_R = File::Spec->catdir( @x_R );
|
|
my $lead_L = File::Spec->catpath( $Vol_L, $dirs_L, "" );
|
|
my $lead_R = File::Spec->catpath( $Vol_R, $dirs_R, "" );
|
|
|
|
return $lead_L, $lead_R, $success;
|
|
|
|
} # 1}}}
|
|
sub get_leading_dirs { # {{{1
|
|
my ($rh_file_list_L, $rh_file_list_R) = @_;
|
|
# find uniquely named files in both sets to help determine the
|
|
# leading directory positions
|
|
my %unique_filename = ();
|
|
my %basename_L = ();
|
|
my %basename_R = ();
|
|
foreach my $f (keys %{$rh_file_list_L}) {
|
|
my $bn = basename($f);
|
|
$basename_L{ $bn }{'count'} += 1;
|
|
$basename_L{ $bn }{'fullpath'} = $f;
|
|
}
|
|
foreach my $f (keys %{$rh_file_list_R}) {
|
|
my $bn = basename($f);
|
|
$basename_R{ $bn }{'count'} += 1;
|
|
$basename_R{ $bn }{'fullpath'} = $f;
|
|
}
|
|
foreach my $f (keys %basename_L) {
|
|
next unless $basename_L{$f}{'count'} == 1;
|
|
next unless defined $basename_R{$f} and $basename_R{$f}{'count'} == 1;
|
|
$unique_filename{$f}{'L'} = $basename_L{ $f }{'fullpath'};
|
|
$unique_filename{$f}{'R'} = $basename_R{ $f }{'fullpath'};
|
|
}
|
|
return undef, undef, 0 unless %unique_filename;
|
|
|
|
####my %candidate_leading_dir_L = ();
|
|
####my %candidate_leading_dir_R = ();
|
|
my ($L_drop, $R_drop) = (undef, undef);
|
|
foreach my $f (keys %unique_filename) {
|
|
my $fL = $unique_filename{ $f }{'L'};
|
|
my $fR = $unique_filename{ $f }{'R'};
|
|
|
|
my @DL = File::Spec->splitdir($fL);
|
|
my @DR = File::Spec->splitdir($fR);
|
|
#printf "%-36s -> %-36s\n", $fL, $fR;
|
|
#print Dumper(@DL, @DR);
|
|
# find the most number of common directories between L and R
|
|
if (!defined $L_drop) {
|
|
$L_drop = dirname $fL;
|
|
}
|
|
if (!defined $R_drop) {
|
|
$R_drop = dirname $fR;
|
|
}
|
|
my $n_path_elements_L = scalar @DL;
|
|
my $n_path_elements_R = scalar @DR;
|
|
my $n_path_elem = $n_path_elements_L < $n_path_elements_R ?
|
|
$n_path_elements_L : $n_path_elements_R;
|
|
my ($n_L_drop_this_pair, $n_R_drop_this_pair) = (0, 0);
|
|
for (my $i = 0; $i < $n_path_elem; $i++) {
|
|
last if $DL[ $#DL - $i] ne $DR[ $#DR - $i];
|
|
++$n_L_drop_this_pair;
|
|
++$n_R_drop_this_pair;
|
|
}
|
|
my $L_common = File::Spec->catdir( @DL[0..($#DL-$n_L_drop_this_pair)] );
|
|
my $R_common = File::Spec->catdir( @DR[0..($#DR-$n_R_drop_this_pair)] );
|
|
#print "L_common=$L_common\n";
|
|
#print "R_common=$R_common\n";
|
|
$L_drop = $L_common if length $L_common < length $L_drop;
|
|
$R_drop = $R_common if length $R_common < length $R_drop;
|
|
|
|
$L_drop = $L_drop . "/" if $L_drop;
|
|
$R_drop = $R_drop . "/" if $R_drop;
|
|
########my $ptr_L = length($fL) - 1;
|
|
########my $ptr_R = length($fR) - 1;
|
|
########my @aL = split '', $fL;
|
|
########my @aR = split '', $fR;
|
|
########while ($ptr_L >= 0 and $ptr_R >= 0) {
|
|
######## last if $aL[$ptr_L] ne $aR[$ptr_R];
|
|
######## --$ptr_L;
|
|
######## --$ptr_R;
|
|
########}
|
|
########my $leading_dir_L = "";
|
|
######## $leading_dir_L = substr($fL, 0, $ptr_L+1) if $ptr_L >= 0;
|
|
########my $leading_dir_R = "";
|
|
######## $leading_dir_R = substr($fR, 0, $ptr_R+1) if $ptr_R >= 0;
|
|
########++$candidate_leading_dir_L{$leading_dir_L};
|
|
########++$candidate_leading_dir_R{$leading_dir_R};
|
|
}
|
|
#use Data::Dumper::Simple;
|
|
# at this point path separator on Windows is already /
|
|
|
|
while ($L_drop =~ m{//}) {
|
|
$L_drop =~ s{//}{/}g;
|
|
}
|
|
while ($R_drop =~ m{//}) {
|
|
$R_drop =~ s{//}{/}g;
|
|
}
|
|
|
|
#print "L_drop=$L_drop\n";
|
|
#print "R_drop=$R_drop\n";
|
|
return $L_drop, $R_drop, 1;
|
|
####my $best_L = (sort {
|
|
#### $candidate_leading_dir_L{$b} <=>
|
|
#### $candidate_leading_dir_L{$a}} keys %candidate_leading_dir_L)[0];
|
|
####my $best_R = (sort {
|
|
#### $candidate_leading_dir_R{$b} <=>
|
|
#### $candidate_leading_dir_R{$a}} keys %candidate_leading_dir_R)[0];
|
|
####return $best_L, $best_R, 1;
|
|
} # 1}}}
|
|
sub align_by_pairs { # {{{1
|
|
my ($rh_file_list_L , # in
|
|
$rh_file_list_R , # in
|
|
$ra_added , # out
|
|
$ra_removed , # out
|
|
$ra_compare_list , # out
|
|
) = @_;
|
|
print "-> align_by_pairs()\n" if $opt_v > 2;
|
|
@{$ra_compare_list} = ();
|
|
|
|
my @files_L = sort keys %{$rh_file_list_L};
|
|
my @files_R = sort keys %{$rh_file_list_R};
|
|
return () unless @files_L or @files_R; # at least one must have stuff
|
|
if ($ON_WINDOWS) {
|
|
foreach (@files_L) { $_ =~ s{\\}{/}g; }
|
|
foreach (@files_R) { $_ =~ s{\\}{/}g; }
|
|
if ($opt_ignore_case_ext) {
|
|
foreach (@files_L) { $_ = lc $_; }
|
|
foreach (@files_R) { $_ = lc $_; }
|
|
}
|
|
}
|
|
|
|
if ( @files_L and !@files_R) {
|
|
# left side has stuff, right side is empty; everything deleted
|
|
@{$ra_added } = ();
|
|
@{$ra_removed } = @files_L;
|
|
@{$ra_compare_list} = ();
|
|
return;
|
|
} elsif (!@files_L and @files_R) {
|
|
# left side is empty, right side has stuff; everything added
|
|
@{$ra_added } = @files_R;
|
|
@{$ra_removed } = ();
|
|
@{$ra_compare_list} = ();
|
|
return;
|
|
} elsif (scalar(@files_L) == 1 and scalar(@files_R) == 1) {
|
|
# Special case of comparing one file against another. In
|
|
# this case force the pair to be aligned with each other,
|
|
# otherwise the file naming logic will think one file
|
|
# was added and the other deleted.
|
|
@{$ra_added } = ();
|
|
@{$ra_removed } = ();
|
|
@{$ra_compare_list} = ( [$files_L[0], $files_R[0]] );
|
|
return;
|
|
}
|
|
#use Data::Dumper::Simple;
|
|
#print Dumper("align_by_pairs", %{$rh_file_list_L}, %{$rh_file_list_R},);
|
|
#die;
|
|
|
|
# The harder case: compare groups of files. This only works
|
|
# if the groups are in different directories so the first step
|
|
# is to strip the leading directory names from file lists to
|
|
# make it possible to align by file names.
|
|
my @files_L_minus_dir = undef;
|
|
my @files_R_minus_dir = undef;
|
|
|
|
my $deepest_file_L = find_deepest_file(@files_L);
|
|
my $deepest_file_R = find_deepest_file(@files_R);
|
|
#print "deepest L = [$deepest_file_L]\n";
|
|
#print "deepest R = [$deepest_file_R]\n";
|
|
my ($leading_dir_L, $leading_dir_R, $success) =
|
|
get_leading_dirs($rh_file_list_L, $rh_file_list_R);
|
|
#print "leading_dir_L=[$leading_dir_L]\n";
|
|
#print "leading_dir_R=[$leading_dir_R]\n";
|
|
#print "success =[$success]\n";
|
|
if ($success) {
|
|
@files_L_minus_dir = strip_leading_dir($leading_dir_L, @files_L);
|
|
@files_R_minus_dir = strip_leading_dir($leading_dir_R, @files_R);
|
|
} else {
|
|
# otherwise fall back to old strategy
|
|
@files_L_minus_dir = remove_leading_dir(@files_L);
|
|
@files_R_minus_dir = remove_leading_dir(@files_R);
|
|
}
|
|
|
|
# Keys of the stripped_X arrays are canonical file names;
|
|
# should overlap mostly. Keys in stripped_L but not in
|
|
# stripped_R are files that have been deleted. Keys in
|
|
# stripped_R but not in stripped_L have been added.
|
|
my %stripped_L = ();
|
|
@stripped_L{ @files_L_minus_dir } = @files_L;
|
|
my %stripped_R = ();
|
|
@stripped_R{ @files_R_minus_dir } = @files_R;
|
|
|
|
my %common = ();
|
|
foreach my $f (keys %stripped_L) {
|
|
$common{$f} = 1 if defined $stripped_R{$f};
|
|
}
|
|
|
|
my %deleted = ();
|
|
foreach my $f (keys %stripped_L) {
|
|
$deleted{$stripped_L{$f}} = $f unless defined $stripped_R{$f};
|
|
}
|
|
|
|
my %added = ();
|
|
foreach my $f (keys %stripped_R) {
|
|
$added{$stripped_R{$f}} = $f unless defined $stripped_L{$f};
|
|
}
|
|
|
|
#use Data::Dumper::Simple;
|
|
#print Dumper("align_by_pairs", %stripped_L, %stripped_R);
|
|
#print Dumper("align_by_pairs", %common, %added, %deleted);
|
|
|
|
foreach my $f (sort keys %common) {
|
|
push @{$ra_compare_list}, [ $stripped_L{$f},
|
|
$stripped_R{$f} ];
|
|
}
|
|
@{$ra_added } = keys %added ;
|
|
@{$ra_removed } = keys %deleted;
|
|
|
|
print "<- align_by_pairs()\n" if $opt_v > 2;
|
|
return;
|
|
#print Dumper("align_by_pairs", @files_L_minus_dir, @files_R_minus_dir);
|
|
#die;
|
|
} # 1}}}
|
|
sub html_header { # {{{1
|
|
my ($title , ) = @_;
|
|
|
|
print "-> html_header\n" if $opt_v > 2;
|
|
return
|
|
'<html>
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
|
<meta name="GENERATOR" content="cloc http://github.com/AlDanial/cloc">
|
|
' .
|
|
"
|
|
<!-- Created by $script v$VERSION -->
|
|
<title>$title</title>
|
|
" .
|
|
'
|
|
<style TYPE="text/css">
|
|
<!--
|
|
body {
|
|
color: black;
|
|
background-color: white;
|
|
font-family: monospace
|
|
}
|
|
|
|
.whitespace {
|
|
background-color: gray;
|
|
}
|
|
|
|
.comment {
|
|
color: gray;
|
|
font-style: italic;
|
|
}
|
|
|
|
.clinenum {
|
|
color: red;
|
|
}
|
|
|
|
.linenum {
|
|
color: green;
|
|
}
|
|
-->
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<pre><tt>
|
|
';
|
|
print "<- html_header\n" if $opt_v > 2;
|
|
} # 1}}}
|
|
sub html_end { # {{{1
|
|
return
|
|
'</tt></pre>
|
|
</body>
|
|
</html>
|
|
';
|
|
} # 1}}}
|
|
sub die_unknown_lang { # {{{1
|
|
my ($lang, $option_name) = @_;
|
|
die "Unknown language '$lang' used with $option_name option. " .
|
|
"The command\n $script --show-lang\n" .
|
|
"will print all recognized languages. Language names are " .
|
|
"case sensitive.\n" ;
|
|
} # 1}}}
|
|
sub unicode_file { # {{{1
|
|
my $file = shift @_;
|
|
|
|
print "-> unicode_file($file)\n" if $opt_v > 2;
|
|
return 0 if (get_size($file) > 2_000_000);
|
|
# don't bother trying to test binary files bigger than 2 MB
|
|
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
warn "Unable to read $file; ignoring.\n";
|
|
return 0;
|
|
}
|
|
my @lines = <$IN>;
|
|
$IN->close;
|
|
|
|
if (unicode_to_ascii( join('', @lines) )) {
|
|
print "<- unicode_file()\n" if $opt_v > 2;
|
|
return 1;
|
|
} else {
|
|
print "<- unicode_file()\n" if $opt_v > 2;
|
|
return 0;
|
|
}
|
|
|
|
} # 1}}}
|
|
sub unicode_to_ascii { # {{{1
|
|
my $string = shift @_;
|
|
|
|
# A trivial attempt to convert UTF-16 little or big endian
|
|
# files into ASCII. These files exhibit the following byte
|
|
# sequence:
|
|
# byte 1: 255
|
|
# byte 2: 254
|
|
# byte 3: ord of ASCII character
|
|
# byte 4: 0
|
|
# byte 3+i: ord of ASCII character
|
|
# byte 4+i: 0
|
|
# or
|
|
# byte 1: 255
|
|
# byte 2: 254
|
|
# byte 3: 0
|
|
# byte 4: ord of ASCII character
|
|
# byte 3+i: 0
|
|
# byte 4+i: ord of ASCII character
|
|
#
|
|
print "-> unicode_to_ascii()\n" if $opt_v > 2;
|
|
|
|
my $length = length $string;
|
|
#print "length=$length\n";
|
|
return '' if $length <= 3;
|
|
my @unicode = split(//, $string);
|
|
|
|
# check the first 100 characters (= 200 bytes) for big or
|
|
# little endian UTF-16 encoding
|
|
my $max_peek = $length < 200 ? $length : 200;
|
|
my $max_for_pass = $length < 200 ? 0.9*$max_peek/2 : 90;
|
|
my @view_1 = ();
|
|
for (my $i = 2; $i < $max_peek; $i += 2) { push @view_1, $unicode[$i] }
|
|
my @view_2 = ();
|
|
for (my $i = 3; $i < $max_peek; $i += 2) { push @view_2, $unicode[$i] }
|
|
|
|
my $points_1 = 0;
|
|
foreach my $C (@view_1) {
|
|
++$points_1 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
|
|
or ord($C) == 10
|
|
or ord($C) == 9;
|
|
}
|
|
|
|
my $points_2 = 0;
|
|
foreach my $C (@view_2) {
|
|
++$points_2 if (32 <= ord($C) and ord($C) <= 127) or ord($C) == 13
|
|
or ord($C) == 10
|
|
or ord($C) == 9;
|
|
}
|
|
#print "points 1: $points_1\n";
|
|
#print "points 2: $points_2\n";
|
|
#print "max_peek : $max_peek\n";
|
|
#print "max_for_pass: $max_for_pass\n";
|
|
|
|
my $offset = undef;
|
|
if ($points_1 > $max_for_pass) { $offset = 2; }
|
|
elsif ($points_2 > $max_for_pass) { $offset = 3; }
|
|
else {
|
|
print "<- unicode_to_ascii() a p1=$points_1 p2=$points_2\n" if $opt_v > 2;
|
|
return '';
|
|
} # neither big or little endian UTF-16
|
|
|
|
my @ascii = ();
|
|
for (my $i = $offset; $i < $length; $i += 2) {
|
|
# some compound characters are made of HT (9), LF (10), or CR (13)
|
|
# True HT, LF, CR are followed by 00; only add those.
|
|
my $L = $unicode[$i];
|
|
if (ord($L) == 9 or ord($L) == 10 or ord($L) == 13) {
|
|
my $companion;
|
|
if ($points_1) {
|
|
last if $i+1 >= $length;
|
|
$companion = $unicode[$i+1];
|
|
} else {
|
|
$companion = $unicode[$i-1];
|
|
}
|
|
if (ord($companion) == 0) {
|
|
push @ascii, $L;
|
|
} else {
|
|
push @ascii, " "; # no clue what this letter is
|
|
}
|
|
} else {
|
|
push @ascii, $L;
|
|
}
|
|
}
|
|
print "<- unicode_to_ascii() b p1=$points_1 p2=$points_2\n" if $opt_v > 2;
|
|
return join("", @ascii);
|
|
} # 1}}}
|
|
sub uncompress_archive_cmd { # {{{1
|
|
my ($archive_file, ) = @_;
|
|
|
|
# Wrap $archive_file in single or double quotes in the system
|
|
# commands below to avoid filename chicanery (including
|
|
# spaces in the names).
|
|
|
|
print "-> uncompress_archive_cmd($archive_file)\n" if $opt_v > 2;
|
|
my $extract_cmd = "";
|
|
my $missing = "";
|
|
if ($opt_extract_with) {
|
|
( $extract_cmd = $opt_extract_with ) =~ s/>FILE</$archive_file/g;
|
|
} elsif (basename($archive_file) eq "-" and !$ON_WINDOWS) {
|
|
$extract_cmd = "cat > -";
|
|
} elsif ($archive_file =~ /\.tar$/ and $ON_WINDOWS) {
|
|
$extract_cmd = "tar -xf \"$archive_file\"";
|
|
} elsif (($archive_file =~ /\.tar\.(gz|Z)$/ or
|
|
$archive_file =~ /\.tgz$/ ) and !$ON_WINDOWS) {
|
|
if (external_utility_exists("gzip --version")) {
|
|
if (external_utility_exists("tar --version")) {
|
|
$extract_cmd = "gzip -dc '$archive_file' | tar xf -";
|
|
} else {
|
|
$missing = "tar";
|
|
}
|
|
} else {
|
|
$missing = "gzip";
|
|
}
|
|
} elsif ($archive_file =~ /\.tar\.bz2$/ and !$ON_WINDOWS) {
|
|
if (external_utility_exists("bzip2 --help")) {
|
|
if (external_utility_exists("tar --version")) {
|
|
$extract_cmd = "bzip2 -dc '$archive_file' | tar xf -";
|
|
} else {
|
|
$missing = "tar";
|
|
}
|
|
} else {
|
|
$missing = "bzip2";
|
|
}
|
|
} elsif ($archive_file =~ /\.tar\.xz$/ and !$ON_WINDOWS) {
|
|
if (external_utility_exists("unxz --version")) {
|
|
if (external_utility_exists("tar --version")) {
|
|
$extract_cmd = "unxz -dc '$archive_file' | tar xf -";
|
|
} else {
|
|
$missing = "tar";
|
|
}
|
|
} else {
|
|
$missing = "bzip2";
|
|
}
|
|
} elsif ($archive_file =~ /\.tar$/ and !$ON_WINDOWS) {
|
|
$extract_cmd = "tar xf '$archive_file'";
|
|
} elsif ($archive_file =~ /\.src\.rpm$/i and !$ON_WINDOWS) {
|
|
if (external_utility_exists("cpio --version")) {
|
|
if (external_utility_exists("rpm2cpio")) {
|
|
$extract_cmd = "rpm2cpio '$archive_file' | cpio -i";
|
|
} else {
|
|
$missing = "rpm2cpio";
|
|
}
|
|
} else {
|
|
$missing = "bzip2";
|
|
}
|
|
} elsif ($archive_file =~ /\.(whl|zip)$/i and !$ON_WINDOWS) {
|
|
if (external_utility_exists("unzip")) {
|
|
$extract_cmd = "unzip -qq -d . '$archive_file'";
|
|
} else {
|
|
$missing = "unzip";
|
|
}
|
|
} elsif ($archive_file =~ /\.deb$/i and !$ON_WINDOWS) {
|
|
# only useful if the .deb contains source code--most
|
|
# .deb files just have compiled executables
|
|
if (external_utility_exists("dpkg-deb")) {
|
|
$extract_cmd = "dpkg-deb -x '$archive_file' .";
|
|
} else {
|
|
$missing = "dpkg-deb";
|
|
}
|
|
} elsif ($ON_WINDOWS and $archive_file =~ /\.(whl|zip)$/i) {
|
|
# use unzip on Windows (comes with git-for-Windows)
|
|
if (external_utility_exists("unzip")) {
|
|
$extract_cmd = "unzip -qq -d . \"$archive_file\" ";
|
|
} else {
|
|
$missing = "unzip";
|
|
}
|
|
}
|
|
print "<- uncompress_archive_cmd\n" if $opt_v > 2;
|
|
if ($missing) {
|
|
die "Unable to expand $archive_file because external\n",
|
|
"utility '$missing' is not available.\n",
|
|
"Another possibility is to use the --extract-with option.\n";
|
|
} else {
|
|
return $extract_cmd;
|
|
}
|
|
}
|
|
# 1}}}
|
|
sub read_list_file { # {{{1
|
|
my ($file, ) = @_;
|
|
# reads filenames from a STDIN pipe if $file == "-"
|
|
|
|
print "-> read_list_file($file)\n" if $opt_v > 2;
|
|
my @entry = ();
|
|
|
|
if ($file eq "-") {
|
|
# read from a STDIN pipe
|
|
my $IN;
|
|
open($IN, $file);
|
|
if (!defined $IN) {
|
|
warn "Unable to read $file; ignoring.\n";
|
|
return ();
|
|
}
|
|
while (<$IN>) {
|
|
next if /^\s*$/ or /^\s*#/; # skip empty or commented lines
|
|
s/\cM$//; # DOS to Unix
|
|
chomp;
|
|
push @entry, $_;
|
|
}
|
|
$IN->close;
|
|
} else {
|
|
# read from an actual file
|
|
foreach my $line (read_file($file)) {
|
|
next if $line =~ /^\s*$/ or $line =~ /^\s*#/;
|
|
$line =~ s/\cM$//; # DOS to Unix
|
|
chomp $line;
|
|
push @entry, $line;
|
|
}
|
|
}
|
|
|
|
print "<- read_list_file\n" if $opt_v > 2;
|
|
return @entry;
|
|
}
|
|
# 1}}}
|
|
sub external_utility_exists { # {{{1
|
|
my $exe = shift @_;
|
|
|
|
my $success = 0;
|
|
if ($ON_WINDOWS) {
|
|
$success = 1 unless system $exe . ' > nul';
|
|
} else {
|
|
$success = 1 unless system $exe . ' >/dev/null 2>&1';
|
|
if (!$success) {
|
|
$success = 1 unless system "which" . " $exe" . ' >/dev/null 2>&1';
|
|
}
|
|
}
|
|
|
|
return $success;
|
|
} # 1}}}
|
|
sub write_xsl_file { # {{{1
|
|
print "-> write_xsl_file\n" if $opt_v > 2;
|
|
my $XSL = # <style> </style> {{{2
|
|
'<?xml version="1.0" encoding="UTF-8"?>
|
|
<!-- XSL file by Paul Schwann, January 2009.
|
|
Fixes for by-file and by-file-by-lang by d_uragan, November 2010.
|
|
-->
|
|
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
|
<xsl:output method="html"/>
|
|
<xsl:template match="/">
|
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
<head>
|
|
<title>CLOC Results</title>
|
|
</head>
|
|
<style type="text/css">
|
|
table {
|
|
table-layout: auto;
|
|
border-collapse: collapse;
|
|
empty-cells: show;
|
|
}
|
|
td, th {
|
|
padding: 4px;
|
|
}
|
|
th {
|
|
background-color: #CCCCCC;
|
|
}
|
|
td {
|
|
text-align: center;
|
|
}
|
|
table, td, tr, th {
|
|
border: thin solid #999999;
|
|
}
|
|
</style>
|
|
<body>
|
|
<h3><xsl:value-of select="results/header"/></h3>
|
|
';
|
|
# 2}}}
|
|
|
|
if ($opt_by_file) {
|
|
$XSL .= # <table> </table>{{{2
|
|
' <table>
|
|
<thead>
|
|
<tr>
|
|
<th>File</th>
|
|
<th>Blank</th>
|
|
<th>Comment</th>
|
|
<th>Code</th>
|
|
<th>Language</th>
|
|
';
|
|
$XSL .=
|
|
' <th>3<sup>rd</sup> Generation Equivalent</th>
|
|
<th>Scale</th>
|
|
' if $opt_3;
|
|
$XSL .=
|
|
' </tr>
|
|
</thead>
|
|
<tbody>
|
|
<xsl:for-each select="results/files/file">
|
|
<tr>
|
|
<th><xsl:value-of select="@name"/></th>
|
|
<td><xsl:value-of select="@blank"/></td>
|
|
<td><xsl:value-of select="@comment"/></td>
|
|
<td><xsl:value-of select="@code"/></td>
|
|
<td><xsl:value-of select="@language"/></td>
|
|
';
|
|
$XSL .=
|
|
' <td><xsl:value-of select="@factor"/></td>
|
|
<td><xsl:value-of select="@scaled"/></td>
|
|
' if $opt_3;
|
|
$XSL .=
|
|
' </tr>
|
|
</xsl:for-each>
|
|
<tr>
|
|
<th>Total</th>
|
|
<th><xsl:value-of select="results/files/total/@blank"/></th>
|
|
<th><xsl:value-of select="results/files/total/@comment"/></th>
|
|
<th><xsl:value-of select="results/files/total/@code"/></th>
|
|
<th><xsl:value-of select="results/files/total/@language"/></th>
|
|
';
|
|
$XSL .=
|
|
' <th><xsl:value-of select="results/files/total/@factor"/></th>
|
|
<th><xsl:value-of select="results/files/total/@scaled"/></th>
|
|
' if $opt_3;
|
|
$XSL .=
|
|
' </tr>
|
|
</tbody>
|
|
</table>
|
|
<br/>
|
|
';
|
|
# 2}}}
|
|
}
|
|
|
|
if (!$opt_by_file or $opt_by_file_by_lang) {
|
|
$XSL .= # <table> </table> {{{2
|
|
' <table>
|
|
<thead>
|
|
<tr>
|
|
<th>Language</th>
|
|
<th>Files</th>
|
|
<th>Blank</th>
|
|
<th>Comment</th>
|
|
<th>Code</th>
|
|
';
|
|
$XSL .=
|
|
' <th>Scale</th>
|
|
<th>3<sup>rd</sup> Generation Equivalent</th>
|
|
' if $opt_3;
|
|
$XSL .=
|
|
' </tr>
|
|
</thead>
|
|
<tbody>
|
|
<xsl:for-each select="results/languages/language">
|
|
<tr>
|
|
<th><xsl:value-of select="@name"/></th>
|
|
<td><xsl:value-of select="@files_count"/></td>
|
|
<td><xsl:value-of select="@blank"/></td>
|
|
<td><xsl:value-of select="@comment"/></td>
|
|
<td><xsl:value-of select="@code"/></td>
|
|
';
|
|
$XSL .=
|
|
' <td><xsl:value-of select="@factor"/></td>
|
|
<td><xsl:value-of select="@scaled"/></td>
|
|
' if $opt_3;
|
|
$XSL .=
|
|
' </tr>
|
|
</xsl:for-each>
|
|
<tr>
|
|
<th>Total</th>
|
|
<th><xsl:value-of select="results/languages/total/@sum_files"/></th>
|
|
<th><xsl:value-of select="results/languages/total/@blank"/></th>
|
|
<th><xsl:value-of select="results/languages/total/@comment"/></th>
|
|
<th><xsl:value-of select="results/languages/total/@code"/></th>
|
|
';
|
|
$XSL .=
|
|
' <th><xsl:value-of select="results/languages/total/@factor"/></th>
|
|
<th><xsl:value-of select="results/languages/total/@scaled"/></th>
|
|
' if $opt_3;
|
|
$XSL .=
|
|
' </tr>
|
|
</tbody>
|
|
</table>
|
|
';
|
|
# 2}}}
|
|
}
|
|
|
|
$XSL.= <<'EO_XSL'; # {{{2
|
|
</body>
|
|
</html>
|
|
</xsl:template>
|
|
</xsl:stylesheet>
|
|
|
|
EO_XSL
|
|
# 2}}}
|
|
|
|
my $XSL_DIFF = <<'EO_DIFF_XSL'; # {{{2
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
<!-- XSL file by Blazej Kroll, November 2010 -->
|
|
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
|
<xsl:output method="html"/>
|
|
<xsl:template match="/">
|
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
<head>
|
|
<title>CLOC Results</title>
|
|
</head>
|
|
<style type="text/css">
|
|
table {
|
|
table-layout: auto;
|
|
border-collapse: collapse;
|
|
empty-cells: show;
|
|
margin: 1em;
|
|
}
|
|
td, th {
|
|
padding: 4px;
|
|
}
|
|
th {
|
|
background-color: #CCCCCC;
|
|
}
|
|
td {
|
|
text-align: center;
|
|
}
|
|
table, td, tr, th {
|
|
border: thin solid #999999;
|
|
}
|
|
</style>
|
|
<body>
|
|
<h3><xsl:value-of select="results/header"/></h3>
|
|
EO_DIFF_XSL
|
|
# 2}}}
|
|
|
|
if ($opt_by_file) {
|
|
$XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
|
|
<table>
|
|
<thead>
|
|
<tr><th colspan="4">Same</th>
|
|
</tr>
|
|
<tr>
|
|
<th>File</th>
|
|
<th>Blank</th>
|
|
<th>Comment</th>
|
|
<th>Code</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<xsl:for-each select="diff_results/same/file">
|
|
<tr>
|
|
<th><xsl:value-of select="@name"/></th>
|
|
<td><xsl:value-of select="@blank"/></td>
|
|
<td><xsl:value-of select="@comment"/></td>
|
|
<td><xsl:value-of select="@code"/></td>
|
|
</tr>
|
|
</xsl:for-each>
|
|
</tbody>
|
|
</table>
|
|
|
|
<table>
|
|
<thead>
|
|
<tr><th colspan="4">Modified</th>
|
|
</tr>
|
|
<tr>
|
|
<th>File</th>
|
|
<th>Blank</th>
|
|
<th>Comment</th>
|
|
<th>Code</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<xsl:for-each select="diff_results/modified/file">
|
|
<tr>
|
|
<th><xsl:value-of select="@name"/></th>
|
|
<td><xsl:value-of select="@blank"/></td>
|
|
<td><xsl:value-of select="@comment"/></td>
|
|
<td><xsl:value-of select="@code"/></td>
|
|
</tr>
|
|
</xsl:for-each>
|
|
</tbody>
|
|
</table>
|
|
|
|
<table>
|
|
<thead>
|
|
<tr><th colspan="4">Added</th>
|
|
</tr>
|
|
<tr>
|
|
<th>File</th>
|
|
<th>Blank</th>
|
|
<th>Comment</th>
|
|
<th>Code</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<xsl:for-each select="diff_results/added/file">
|
|
<tr>
|
|
<th><xsl:value-of select="@name"/></th>
|
|
<td><xsl:value-of select="@blank"/></td>
|
|
<td><xsl:value-of select="@comment"/></td>
|
|
<td><xsl:value-of select="@code"/></td>
|
|
</tr>
|
|
</xsl:for-each>
|
|
</tbody>
|
|
</table>
|
|
|
|
<table>
|
|
<thead>
|
|
<tr><th colspan="4">Removed</th>
|
|
</tr>
|
|
<tr>
|
|
<th>File</th>
|
|
<th>Blank</th>
|
|
<th>Comment</th>
|
|
<th>Code</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<xsl:for-each select="diff_results/removed/file">
|
|
<tr>
|
|
<th><xsl:value-of select="@name"/></th>
|
|
<td><xsl:value-of select="@blank"/></td>
|
|
<td><xsl:value-of select="@comment"/></td>
|
|
<td><xsl:value-of select="@code"/></td>
|
|
</tr>
|
|
</xsl:for-each>
|
|
</tbody>
|
|
</table>
|
|
EO_DIFF_XSL
|
|
# 2}}}
|
|
}
|
|
|
|
if (!$opt_by_file or $opt_by_file_by_lang) {
|
|
$XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
|
|
<table>
|
|
<thead>
|
|
<tr><th colspan="5">Same</th>
|
|
</tr>
|
|
<tr>
|
|
<th>Language</th>
|
|
<th>Files</th>
|
|
<th>Blank</th>
|
|
<th>Comment</th>
|
|
<th>Code</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<xsl:for-each select="diff_results/same/language">
|
|
<tr>
|
|
<th><xsl:value-of select="@name"/></th>
|
|
<td><xsl:value-of select="@files_count"/></td>
|
|
<td><xsl:value-of select="@blank"/></td>
|
|
<td><xsl:value-of select="@comment"/></td>
|
|
<td><xsl:value-of select="@code"/></td>
|
|
</tr>
|
|
</xsl:for-each>
|
|
</tbody>
|
|
</table>
|
|
|
|
<table>
|
|
<thead>
|
|
<tr><th colspan="5">Modified</th>
|
|
</tr>
|
|
<tr>
|
|
<th>Language</th>
|
|
<th>Files</th>
|
|
<th>Blank</th>
|
|
<th>Comment</th>
|
|
<th>Code</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<xsl:for-each select="diff_results/modified/language">
|
|
<tr>
|
|
<th><xsl:value-of select="@name"/></th>
|
|
<td><xsl:value-of select="@files_count"/></td>
|
|
<td><xsl:value-of select="@blank"/></td>
|
|
<td><xsl:value-of select="@comment"/></td>
|
|
<td><xsl:value-of select="@code"/></td>
|
|
</tr>
|
|
</xsl:for-each>
|
|
</tbody>
|
|
</table>
|
|
|
|
<table>
|
|
<thead>
|
|
<tr><th colspan="5">Added</th>
|
|
</tr>
|
|
<tr>
|
|
<th>Language</th>
|
|
<th>Files</th>
|
|
<th>Blank</th>
|
|
<th>Comment</th>
|
|
<th>Code</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<xsl:for-each select="diff_results/added/language">
|
|
<tr>
|
|
<th><xsl:value-of select="@name"/></th>
|
|
<td><xsl:value-of select="@files_count"/></td>
|
|
<td><xsl:value-of select="@blank"/></td>
|
|
<td><xsl:value-of select="@comment"/></td>
|
|
<td><xsl:value-of select="@code"/></td>
|
|
</tr>
|
|
</xsl:for-each>
|
|
</tbody>
|
|
</table>
|
|
|
|
<table>
|
|
<thead>
|
|
<tr><th colspan="5">Removed</th>
|
|
</tr>
|
|
<tr>
|
|
<th>Language</th>
|
|
<th>Files</th>
|
|
<th>Blank</th>
|
|
<th>Comment</th>
|
|
<th>Code</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<xsl:for-each select="diff_results/removed/language">
|
|
<tr>
|
|
<th><xsl:value-of select="@name"/></th>
|
|
<td><xsl:value-of select="@files_count"/></td>
|
|
<td><xsl:value-of select="@blank"/></td>
|
|
<td><xsl:value-of select="@comment"/></td>
|
|
<td><xsl:value-of select="@code"/></td>
|
|
</tr>
|
|
</xsl:for-each>
|
|
</tbody>
|
|
</table>
|
|
EO_DIFF_XSL
|
|
# 2}}}
|
|
|
|
}
|
|
|
|
$XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2
|
|
</body>
|
|
</html>
|
|
</xsl:template>
|
|
</xsl:stylesheet>
|
|
EO_DIFF_XSL
|
|
# 2}}}
|
|
if ($opt_diff) {
|
|
write_file($CLOC_XSL, {}, ( $XSL_DIFF ) );
|
|
} else {
|
|
write_file($CLOC_XSL, {}, ( $XSL ) );
|
|
}
|
|
print "<- write_xsl_file\n" if $opt_v > 2;
|
|
} # 1}}}
|
|
sub normalize_file_names { # {{{1
|
|
print "-> normalize_file_names\n" if $opt_v > 2;
|
|
my (@files, ) = @_;
|
|
|
|
# Returns a hash of file names reduced to a canonical form
|
|
# (fully qualified file names, all path separators changed to /,
|
|
# Windows file names lowercased). Hash values are the original
|
|
# file name.
|
|
|
|
my %normalized = ();
|
|
foreach my $F (@files) {
|
|
my $F_norm = $F;
|
|
if ($ON_WINDOWS) {
|
|
$F_norm = lc $F_norm; # for case insensitive file name comparisons
|
|
$F_norm =~ s{\\}{/}g; # Windows directory separators to Unix
|
|
$F_norm =~ s{^\./}{}g; # remove leading ./
|
|
if (($F_norm !~ m{^/}) and ($F_norm !~ m{^\w:/})) {
|
|
# looks like a relative path; prefix with cwd
|
|
$F_norm = lc "$cwd/$F_norm";
|
|
}
|
|
} else {
|
|
$F_norm =~ s{^\./}{}g; # remove leading ./
|
|
if ($F_norm !~ m{^/}) {
|
|
# looks like a relative path; prefix with cwd
|
|
$F_norm = "$cwd/$F_norm";
|
|
}
|
|
}
|
|
# Remove trailing / so it does not interfere with further regex code
|
|
# that does not expect it
|
|
$F_norm =~ s{/+$}{};
|
|
$normalized{ $F_norm } = $F;
|
|
}
|
|
print "<- normalize_file_names\n" if $opt_v > 2;
|
|
return %normalized;
|
|
} # 1}}}
|
|
sub combine_diffs { # {{{1
|
|
# subroutine by Andy (awalshe@sf.net)
|
|
# https://sourceforge.net/tracker/?func=detail&aid=3261017&group_id=174787&atid=870625
|
|
my ($ra_files) = @_;
|
|
print "-> combine_diffs\n" if $opt_v > 2;
|
|
|
|
my $res = "$URL v $VERSION\n";
|
|
my $dl = '-';
|
|
my $width = 79;
|
|
# columns are in this order
|
|
my @cols = ('files', 'blank', 'comment', 'code');
|
|
my %HoH = ();
|
|
|
|
foreach my $file (@{$ra_files}) {
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
warn "Unable to read $file; ignoring.\n";
|
|
next;
|
|
}
|
|
|
|
my $sec;
|
|
while (<$IN>) {
|
|
chomp;
|
|
s/\cM$//;
|
|
next if /^(http|Language|-----)/;
|
|
if (/^[A-Za-z0-9]+/) { # section title
|
|
$sec = $_;
|
|
chomp($sec);
|
|
$HoH{$sec} = () if ! exists $HoH{$sec};
|
|
next;
|
|
}
|
|
|
|
if (/^\s(same|modified|added|removed)/) { # calculated totals row
|
|
my @ar = grep { $_ ne '' } split(/ /, $_);
|
|
chomp(@ar);
|
|
my $ttl = shift @ar;
|
|
my $i = 0;
|
|
foreach(@ar) {
|
|
my $t = "${ttl}${dl}${cols[$i]}";
|
|
$HoH{$sec}{$t} = 0 if ! exists $HoH{$sec}{$t};
|
|
$HoH{$sec}{$t} += $_;
|
|
$i++;
|
|
}
|
|
}
|
|
}
|
|
$IN->close;
|
|
}
|
|
|
|
# rows are in this order
|
|
my @rows = ('same', 'modified', 'added', 'removed');
|
|
|
|
$res .= sprintf("%s\n", "-" x $width);
|
|
$res .= sprintf("%-19s %14s %14s %14s %14s\n", 'Language',
|
|
$cols[0], $cols[1], $cols[2], $cols[3]);
|
|
$res .= sprintf("%s\n", "-" x $width);
|
|
|
|
# no inputs? %HoH will be empty
|
|
return $res unless %HoH;
|
|
|
|
for my $sec ( keys %HoH ) {
|
|
next if $sec =~ /SUM:/;
|
|
next unless defined $HoH{$sec}; # eg, the header line
|
|
$res .= "$sec\n";
|
|
foreach (@rows) {
|
|
$res .= sprintf(" %-18s %14s %14s %14s %14s\n",
|
|
$_, $HoH{$sec}{"${_}${dl}${cols[0]}"},
|
|
$HoH{$sec}{"${_}${dl}${cols[1]}"},
|
|
$HoH{$sec}{"${_}${dl}${cols[2]}"},
|
|
$HoH{$sec}{"${_}${dl}${cols[3]}"});
|
|
}
|
|
}
|
|
$res .= sprintf("%s\n", "-" x $width);
|
|
my $sec = 'SUM:';
|
|
$res .= "$sec\n";
|
|
foreach (@rows) {
|
|
$res .= sprintf(" %-18s %14s %14s %14s %14s\n",
|
|
$_, $HoH{$sec}{"${_}${dl}${cols[0]}"},
|
|
$HoH{$sec}{"${_}${dl}${cols[1]}"},
|
|
$HoH{$sec}{"${_}${dl}${cols[2]}"},
|
|
$HoH{$sec}{"${_}${dl}${cols[3]}"});
|
|
}
|
|
$res .= sprintf("%s\n", "-" x $width);
|
|
|
|
print "<- combine_diffs\n" if $opt_v > 2;
|
|
return $res;
|
|
} # 1}}}
|
|
sub combine_csv_diffs { # {{{1
|
|
my ($delimiter, $ra_files) = @_;
|
|
print "-> combine_csv_diffs\n" if $opt_v > 2;
|
|
|
|
my %sum = (); # sum{ language } = array of 17 values
|
|
foreach my $file (@{$ra_files}) {
|
|
my $IN = open_file('<', $file, 1);
|
|
if (!defined $IN) {
|
|
warn "Unable to read $file; ignoring.\n";
|
|
next;
|
|
}
|
|
|
|
my $sec;
|
|
while (<$IN>) {
|
|
next if /^Language${delimiter}\s==\sfiles${delimiter}/;
|
|
chomp;
|
|
my @words = split(/$delimiter/);
|
|
my $n_col = scalar(@words);
|
|
if ($n_col != 18) {
|
|
warn "combine_csv_diffs(): Parse failure line $. of $file\n";
|
|
warn "Expected 18 columns, got $n_col\n";
|
|
die;
|
|
}
|
|
my $Lang = $words[0];
|
|
my @count = map { int($_) } @words[1..16];
|
|
if (defined $sum{$Lang}) {
|
|
for (my $i = 0; $i < 16; $i++) {
|
|
$sum{$Lang}[$i] += $count[$i];
|
|
}
|
|
} else {
|
|
@{$sum{$Lang}} = @count;
|
|
}
|
|
}
|
|
$IN->close;
|
|
}
|
|
|
|
my @header = ("Language", "== files", "!= files", "+ files", "- files",
|
|
"== blank", "!= blank", "+ blank", "- blank", "== comment",
|
|
"!= comment", "+ comment", "- comment", "== code",
|
|
"!= code", "+ code", "- code", "$URL v $VERSION" );
|
|
|
|
my $res = join("$delimiter ", @header) . "$delimiter\n";
|
|
foreach my $Lang (sort keys %sum) {
|
|
$res .= $Lang . "$delimiter ";
|
|
for (my $i = 0; $i < 16; $i++) {
|
|
$res .= $sum{$Lang}[$i] . "$delimiter ";
|
|
}
|
|
$res .= "\n";
|
|
}
|
|
|
|
print "<- combine_csv_diffs\n" if $opt_v > 2;
|
|
return $res;
|
|
} # 1}}}
|
|
sub get_time { # {{{1
|
|
if ($HAVE_Time_HiRes) {
|
|
return Time::HiRes::time();
|
|
} else {
|
|
return time();
|
|
}
|
|
} # 1}}}
|
|
sub really_is_D { # {{{1
|
|
# Ref bug 131, files ending with .d could be init.d scripts
|
|
# instead of D language source files.
|
|
my ($file , # in
|
|
$rh_Err , # in hash of error codes
|
|
$raa_errors , # out
|
|
) = @_;
|
|
print "-> really_is_D($file)\n" if $opt_v > 2;
|
|
my ($possible_script, $L) = peek_at_first_line($file, $rh_Err, $raa_errors);
|
|
|
|
print "<- really_is_D($file)\n" if $opt_v > 2;
|
|
return $possible_script; # null string if D, otherwise a language
|
|
} # 1}}}
|
|
sub no_autogen_files { # {{{1
|
|
# ref https://github.com/AlDanial/cloc/issues/151
|
|
my ($print,) = @_;
|
|
print "-> no_autogen($print)\n" if $opt_v > 2;
|
|
|
|
# These sometimes created manually?
|
|
# acinclude.m4
|
|
# configure.ac
|
|
# Makefile.am
|
|
|
|
my @files = qw (
|
|
aclocal.m4
|
|
announce-gen
|
|
autogen.sh
|
|
bootstrap
|
|
compile
|
|
config.guess
|
|
config.h.in
|
|
config.rpath
|
|
config.status
|
|
config.sub
|
|
configure
|
|
configure.in
|
|
depcomp
|
|
gendocs.sh
|
|
gitlog-to-changelog
|
|
git-version-gen
|
|
gnupload
|
|
gnu-web-doc-update
|
|
install-sh
|
|
libtool
|
|
libtool.m4
|
|
link-warning.h
|
|
ltmain.sh
|
|
lt~obsolete.m4
|
|
ltoptions.m4
|
|
ltsugar.m4
|
|
ltversion.in
|
|
ltversion.m4
|
|
Makefile.in
|
|
mdate-sh
|
|
missing
|
|
mkinstalldirs
|
|
test-driver
|
|
texinfo.tex
|
|
update-copyright
|
|
useless-if-before-free
|
|
vc-list-files
|
|
ylwrap
|
|
);
|
|
|
|
if ($print) {
|
|
printf "cloc will ignore these %d files with --no-autogen:\n", scalar @files;
|
|
foreach my $F (@files) {
|
|
print " $F\n";
|
|
}
|
|
print "Additionally, Go files with '// Code generated by .* DO NOT EDIT.'\n";
|
|
print "on the first line are ignored.\n";
|
|
}
|
|
print "<- no_autogen()\n" if $opt_v > 2;
|
|
return @files;
|
|
} # 1}}}
|
|
sub load_from_config_file { # {{{1
|
|
# Supports all options except --config itself which would
|
|
# be pointless.
|
|
my ($config_file,
|
|
$rs_by_file ,
|
|
$rs_by_file_by_lang ,
|
|
$rs_categorized ,
|
|
$rs_counted ,
|
|
$rs_include_ext ,
|
|
$rs_include_lang ,
|
|
$rs_include_content ,
|
|
$rs_exclude_content ,
|
|
$rs_exclude_lang ,
|
|
$rs_exclude_dir ,
|
|
$rs_exclude_list_file ,
|
|
$rs_explain ,
|
|
$rs_extract_with ,
|
|
$rs_found ,
|
|
$rs_count_diff ,
|
|
$rs_diff_list_files ,
|
|
$rs_diff ,
|
|
$rs_diff_alignment ,
|
|
$rs_diff_timeout ,
|
|
$rs_timeout ,
|
|
$rs_html ,
|
|
$rs_ignored ,
|
|
$rs_quiet ,
|
|
$rs_force_lang_def ,
|
|
$rs_read_lang_def ,
|
|
$rs_show_ext ,
|
|
$rs_show_lang ,
|
|
$rs_progress_rate ,
|
|
$rs_print_filter_stages ,
|
|
$rs_report_file ,
|
|
$ra_script_lang ,
|
|
$rs_sdir ,
|
|
$rs_skip_uniqueness ,
|
|
$rs_strip_code ,
|
|
$rs_strip_comments ,
|
|
$rs_original_dir ,
|
|
$rs_sum_reports ,
|
|
$rs_hide_rate ,
|
|
$rs_processes ,
|
|
$rs_unicode ,
|
|
$rs_3 ,
|
|
$rs_v ,
|
|
$rs_vcs ,
|
|
$rs_version ,
|
|
$rs_write_lang_def ,
|
|
$rs_write_lang_def_incl_dup,
|
|
$rs_xml ,
|
|
$rs_xsl ,
|
|
$ra_force_lang ,
|
|
$rs_lang_no_ext ,
|
|
$rs_yaml ,
|
|
$rs_csv ,
|
|
$rs_csv_delimiter ,
|
|
$rs_json ,
|
|
$rs_md ,
|
|
$rs_fullpath ,
|
|
$rs_match_f ,
|
|
$ra_not_match_f ,
|
|
$rs_match_d ,
|
|
$ra_not_match_d ,
|
|
$rs_list_file ,
|
|
$rs_help ,
|
|
$rs_skip_win_hidden ,
|
|
$rs_read_binary_files ,
|
|
$rs_sql ,
|
|
$rs_sql_project ,
|
|
$rs_sql_append ,
|
|
$rs_sql_style ,
|
|
$rs_inline ,
|
|
$rs_exclude_ext ,
|
|
$rs_ignore_whitespace ,
|
|
$rs_ignore_case ,
|
|
$rs_ignore_case_ext ,
|
|
$rs_follow_links ,
|
|
$rs_autoconf ,
|
|
$rs_sum_one ,
|
|
$rs_by_percent ,
|
|
$rs_stdin_name ,
|
|
$rs_force_on_windows ,
|
|
$rs_force_on_unix ,
|
|
$rs_show_os ,
|
|
$rs_skip_archive ,
|
|
$rs_max_file_size ,
|
|
$rs_use_sloccount ,
|
|
$rs_no_autogen ,
|
|
$rs_force_git ,
|
|
$rs_strip_str_comments ,
|
|
$rs_file_encoding ,
|
|
$rs_docstring_as_code ,
|
|
$rs_stat ,
|
|
) = @_;
|
|
# look for runtime configuration file in
|
|
# $ENV{'HOME'}/.config/cloc/options.txt -> POSIX
|
|
# $ENV{'APPDATA'} . 'cloc'
|
|
|
|
print "-> load_from_config_file($config_file)\n" if $opt_v and $opt_v > 2;
|
|
if (!is_file($config_file)) {
|
|
print "<- load_from_config_file() (no such file: $config_file)\n" if $opt_v and $opt_v > 2;
|
|
return;
|
|
} elsif (!can_read($config_file)) {
|
|
print "<- load_from_config_file() (unable to read $config_file)\n" if $opt_v and $opt_v > 2;
|
|
return;
|
|
}
|
|
print "Reading options from $config_file.\n" if defined $opt_v;
|
|
|
|
my $has_force_lang = @{$ra_force_lang};
|
|
my $has_script_lang = @{$ra_script_lang};
|
|
my @lines = read_file($config_file);
|
|
foreach (@lines) {
|
|
next if /^\s*$/ or /^\s*#/;
|
|
s/\s*--//;
|
|
s/^\s+//;
|
|
if (!defined ${$rs_by_file} and /^(by_file|by-file)/) { ${$rs_by_file} = 1;
|
|
} elsif (!defined ${$rs_by_file_by_lang} and /^(by_file_by_lang|by-file-by-lang)/) { ${$rs_by_file_by_lang} = 1;
|
|
} elsif (!defined ${$rs_categorized} and /^categorized(=|\s+)(.*?)$/) { ${$rs_categorized} = $2;
|
|
} elsif (!defined ${$rs_counted} and /^counted(=|\s+)(.*?)$/) { ${$rs_counted} = $2;
|
|
} elsif (!defined ${$rs_include_ext} and /^(?:include_ext|include-ext)(=|\s+)(.*?)$/) { ${$rs_include_ext} = $2;
|
|
} elsif (!defined ${$rs_include_lang} and /^(?:include_lang|include-lang)(=|\s+)(.*?)$/) { ${$rs_include_lang} = $2;
|
|
} elsif (!defined ${$rs_include_content} and /^(?:include_content|include-content)(=|\s+)(.*?)$/) { ${$rs_include_content} = $2;
|
|
} elsif (!defined ${$rs_exclude_content} and /^(?:exclude_content|exclude-content)(=|\s+)(.*?)$/) { ${$rs_exclude_content} = $2;
|
|
} elsif (!defined ${$rs_exclude_lang} and /^(?:exclude_lang|exclude-lang)(=|\s+)(.*?)$/) { ${$rs_exclude_lang} = $2;
|
|
} elsif (!defined ${$rs_exclude_dir} and /^(?:exclude_dir|exclude-dir)(=|\s+)(.*?)$/) { ${$rs_exclude_dir} = $2;
|
|
} elsif (!defined ${$rs_explain} and /^explain(=|\s+)(.*?)$/) { ${$rs_explain} = $2;
|
|
} elsif (!defined ${$rs_extract_with} and /^(?:extract_with|extract-with)(=|\s+)(.*?)$/) { ${$rs_extract_with} = $2;
|
|
} elsif (!defined ${$rs_found} and /^found(=|\s+)(.*?)$/) { ${$rs_found} = $2;
|
|
} elsif (!defined ${$rs_count_diff} and /^(count_and_diff|count-and-diff)/) { ${$rs_count_diff} = 1;
|
|
} elsif (!defined ${$rs_diff_list_files} and /^(diff_list_files|diff-list-files)/) { ${$rs_diff_list_files} = 1;
|
|
} elsif (!defined ${$rs_diff} and /^diff/) { ${$rs_diff} = 1;
|
|
} elsif (!defined ${$rs_diff_alignment} and /^(?:diff-alignment|diff_alignment)(=|\s+)(.*?)$/) { ${$rs_diff_alignment} = $2;
|
|
} elsif (!defined ${$rs_diff_timeout} and /^(?:diff-timeout|diff_timeout)(=|\s+)i/) { ${$rs_diff_timeout} = $1;
|
|
} elsif (!defined ${$rs_timeout} and /^timeout(=|\s+)i/) { ${$rs_timeout} = $1;
|
|
} elsif (!defined ${$rs_html} and /^html/) { ${$rs_html} = 1;
|
|
} elsif (!defined ${$rs_ignored} and /^ignored(=|\s+)(.*?)$/) { ${$rs_ignored} = $2;
|
|
} elsif (!defined ${$rs_quiet} and /^quiet/) { ${$rs_quiet} = 1;
|
|
} elsif (!defined ${$rs_force_lang_def} and /^(?:force_lang_def|force-lang-def)(=|\s+)(.*?)$/) { ${$rs_force_lang_def} = $2;
|
|
} elsif (!defined ${$rs_read_lang_def} and /^(?:read_lang_def|read-lang-def)(=|\s+)(.*?)$/) { ${$rs_read_lang_def} = $2;
|
|
} elsif (!defined ${$rs_progress_rate} and /^(?:progress_rate|progress-rate)(=|\s+)(\d+)/) { ${$rs_progress_rate} = $2;
|
|
} elsif (!defined ${$rs_print_filter_stages} and /^(print_filter_stages|print-filter-stages)/) { ${$rs_print_filter_stages}= 1;
|
|
} elsif (!defined ${$rs_report_file} and /^(?:report_file|report-file)(=|\s+)(.*?)$/) { ${$rs_report_file} = $2;
|
|
} elsif (!defined ${$rs_report_file} and /^out(=|\s+)(.*?)$/) { ${$rs_report_file} = $2;
|
|
} elsif (!defined ${$rs_sdir} and /^sdir(=|\s+)(.*?)$/) { ${$rs_sdir} = $2;
|
|
} elsif (!defined ${$rs_skip_uniqueness} and /^(skip_uniqueness|skip-uniqueness)/) { ${$rs_skip_uniqueness} = 1;
|
|
} elsif (!defined ${$rs_strip_code} and /^(?:strip_code|strip-code)(=|\s+)(.*?)$/) { ${$rs_strip_code} = $2;
|
|
} elsif (!defined ${$rs_strip_comments} and /^(?:strip_comments|strip-comments)(=|\s+)(.*?)$/) { ${$rs_strip_comments} = $2;
|
|
} elsif (!defined ${$rs_original_dir} and /^(original_dir|original-dir)/) { ${$rs_original_dir} = 1;
|
|
} elsif (!defined ${$rs_sum_reports} and /^(sum_reports|sum-reports)/) { ${$rs_sum_reports} = 1;
|
|
} elsif (!defined ${$rs_hide_rate} and /^(hid_rate|hide-rate)/) { ${$rs_hide_rate} = 1;
|
|
} elsif (!defined ${$rs_processes} and /^processes(=|\s+)(\d+)/) { ${$rs_processes} = $2;
|
|
} elsif (!defined ${$rs_unicode} and /^unicode/) { ${$rs_unicode} = 1;
|
|
} elsif (!defined ${$rs_3} and /^3/) { ${$rs_3} = 1;
|
|
} elsif (!defined ${$rs_vcs} and /^vcs(=|\s+)(\S+)/) { ${$rs_vcs} = $2;
|
|
} elsif (!defined ${$rs_version} and /^version/) { ${$rs_version} = 1;
|
|
} elsif (!defined ${$rs_write_lang_def} and /^(?:write_lang_def|write-lang-def)(=|\s+)(.*?)$/) { ${$rs_write_lang_def} = $2;
|
|
} elsif (!defined ${$rs_write_lang_def_incl_dup} and /^(?:write_lang_def_incl_dup|write-lang-def-incl-dup)(=|\s+)(.*?)$/) { ${$rs_write_lang_def_incl_dup} = $2;
|
|
} elsif (!defined ${$rs_xml} and /^xml/) { ${$rs_xml} = 1;
|
|
} elsif (!defined ${$rs_xsl} and /^xsl(=|\s+)(.*?)$/) { ${$rs_xsl} = $2;
|
|
} elsif (!defined ${$rs_lang_no_ext} and /^(?:lang_no_ext|lang-no-ext)(=|\s+)(.*?)$/) { ${$rs_lang_no_ext} = $2;
|
|
} elsif (!defined ${$rs_yaml} and /^yaml/) { ${$rs_yaml} = 1;
|
|
} elsif (!defined ${$rs_csv} and /^csv/) { ${$rs_csv} = 1;
|
|
} elsif (!defined ${$rs_csv_delimiter} and /^(?:csv_delimiter|csv-delimiter)(=|\s+)(.*?)$/) { ${$rs_csv_delimiter} = $2;
|
|
} elsif (!defined ${$rs_json} and /^json/) { ${$rs_json} = 1;
|
|
} elsif (!defined ${$rs_md} and /^md/) { ${$rs_md} = 1;
|
|
} elsif (!defined ${$rs_fullpath} and /^fullpath/) { ${$rs_fullpath} = 1;
|
|
} elsif (!defined ${$rs_match_f} and /^(?:match_f|match-f)(=|\s+)(.*?)$/) { ${$rs_match_f} = $2;
|
|
} elsif (! @{$ra_not_match_f} and /^(?:not_match_f|not-match-f)(=|\s+)(.*?)$/) { push @{$ra_not_match_f} , $2;
|
|
} elsif (!defined ${$rs_match_d} and /^(?:match_d|match-d)(=|\s+)(.*?)$/) { ${$rs_match_d} = $2;
|
|
} elsif (! @{$ra_not_match_d} and /^(?:not_match_d|not-match-d)(=|\s+)(.*?)$/) { push @{$ra_not_match_d} , $2;
|
|
} elsif (!defined ${$rs_list_file} and /^(?:list_file|list-file)(=|\s+)(.*?)$/) { ${$rs_list_file} = $2;
|
|
} elsif (!defined ${$rs_help} and /^help/) { ${$rs_help} = 1;
|
|
} elsif (!defined ${$rs_skip_win_hidden} and /^(skip_win_hidden|skip-win-hidden)/) { ${$rs_skip_win_hidden} = 1;
|
|
} elsif (!defined ${$rs_read_binary_files} and /^(read_binary_files|read-binary-files)/) { ${$rs_read_binary_files} = 1;
|
|
} elsif (!defined ${$rs_sql} and /^sql(=|\s+)(.*?)$/) { ${$rs_sql} = $2;
|
|
} elsif (!defined ${$rs_sql_project} and /^(?:sql_project|sql-project)(=|\s+)(.*?)$/) { ${$rs_sql_project} = $2;
|
|
} elsif (!defined ${$rs_sql_append} and /^(sql_append|sql-append)/) { ${$rs_sql_append} = 1;
|
|
} elsif (!defined ${$rs_sql_style} and /^(?:sql_style|sql-style)(=|\s+)(.*?)$/) { ${$rs_sql_style} = $2;
|
|
} elsif (!defined ${$rs_inline} and /^inline/) { ${$rs_inline} = 1;
|
|
} elsif (!defined ${$rs_exclude_ext} and /^(?:exclude_ext|exclude-ext)(=|\s+)(.*?)$/) { ${$rs_exclude_ext} = $2;
|
|
} elsif (!defined ${$rs_ignore_whitespace} and /^(ignore_whitespace|ignore-whitespace)/) { ${$rs_ignore_whitespace} = 1;
|
|
} elsif (!defined ${$rs_ignore_case_ext} and /^(ignore_case_ext|ignore-case-ext)/) { ${$rs_ignore_case_ext} = 1;
|
|
} elsif (!defined ${$rs_ignore_case} and /^(ignore_case|ignore-case)/) { ${$rs_ignore_case} = 1;
|
|
} elsif (!defined ${$rs_follow_links} and /^(follow_links|follow-links)/) { ${$rs_follow_links} = 1;
|
|
} elsif (!defined ${$rs_autoconf} and /^autoconf/) { ${$rs_autoconf} = 1;
|
|
} elsif (!defined ${$rs_sum_one} and /^(sum_one|sum-one)/) { ${$rs_sum_one} = 1;
|
|
} elsif (!defined ${$rs_by_percent} and /^(?:by_percent|by-percent)(=|\s+)(.*?)$/) { ${$rs_by_percent} = $2;
|
|
} elsif (!defined ${$rs_stdin_name} and /^(?:stdin_name|stdin-name)(=|\s+)(.*?)$/) { ${$rs_stdin_name} = $2;
|
|
} elsif (!defined ${$rs_force_on_windows} and /^windows/) { ${$rs_force_on_windows} = 1;
|
|
} elsif (!defined ${$rs_force_on_unix} and /^unix/) { ${$rs_force_on_unix} = 1;
|
|
} elsif (!defined ${$rs_show_os} and /^(show_os|show-os)/) { ${$rs_show_os} = 1;
|
|
} elsif (!defined ${$rs_skip_archive} and /^(?:skip_archive|skip-archive)(=|\s+)(.*?)$/) { ${$rs_skip_archive} = $2;
|
|
} elsif (!defined ${$rs_max_file_size} and /^(?:max_file_size|max-file-size)(=|\s+)(\d+)/) { ${$rs_max_file_size} = $2;
|
|
} elsif (!defined ${$rs_use_sloccount} and /^(use_sloccount|use-sloccount)/) { ${$rs_use_sloccount} = 1;
|
|
} elsif (!defined ${$rs_no_autogen} and /^(no_autogen|no-autogen)/) { ${$rs_no_autogen} = 1;
|
|
} elsif (!defined ${$rs_force_git} and /^git/) { ${$rs_force_git} = 1;
|
|
} elsif (!defined ${$rs_exclude_list_file} and /^(?:exclude_list_file|exclude-list-file)(=|\s+)(.*?)$/)
|
|
{ ${$rs_exclude_list_file} = $2;
|
|
} elsif (!defined ${$rs_v} and /^(verbose|v)((=|\s+)(\d+))?/) {
|
|
if (!defined $4) { ${$rs_v} = 0; }
|
|
else { ${$rs_v} = $4; }
|
|
} elsif (!$has_script_lang and /^(?:script_lang|script-lang)(=|\s+)(.*?)$/) {
|
|
push @{$ra_script_lang} , $2;
|
|
} elsif (!$has_force_lang and /^(?:force_lang|force-lang)(=|\s+)(.*?)$/) {
|
|
push @{$ra_force_lang} , $2;
|
|
} elsif (!defined ${$rs_show_ext} and /^(show_ext|show-ext)((=|\s+)(.*))?$/) {
|
|
if (!defined $4) { ${$rs_show_ext} = 0; }
|
|
else { ${$rs_show_ext} = $4; }
|
|
} elsif (!defined ${$rs_show_lang} and /^(show_lang|show-lang)((=|\s+)(.*))?s/){
|
|
if (!defined $4) { ${$rs_show_lang} = 0; }
|
|
else { ${$rs_show_lang} = $4; }
|
|
} elsif (!defined ${$rs_strip_str_comments} and /^(strip_str_comments|strip-str-comments)/) { ${$rs_strip_str_comments} = 1;
|
|
} elsif (!defined ${$rs_file_encoding} and /^(?:file_encoding|file-encoding)(=|\s+)(\S+)/) { ${$rs_file_encoding} = $2;
|
|
} elsif (!defined ${$rs_docstring_as_code} and /^(docstring_as_code|docstring-as-code)/) { ${$rs_docstring_as_code} = 1;
|
|
} elsif (!defined ${$rs_stat} and /stat/) { ${$rs_stat} = 1;
|
|
}
|
|
|
|
}
|
|
} # 1}}}
|
|
sub trick_pp_packer_encode { # {{{1
|
|
use Encode;
|
|
# PAR::Packer gives 'Unknown PerlIO layer "encoding"' unless it is
|
|
# forced into using this module.
|
|
my ($OUT, $JunkFile) = tempfile(UNLINK => 1); # delete on exit
|
|
open($OUT, "> :encoding(utf8)", $JunkFile);
|
|
close($OUT);
|
|
}
|
|
# 1}}}
|
|
sub really_is_smarty { # {{{1
|
|
# Given filename, returns TRUE if its contents look like Smarty template
|
|
my ($filename, ) = @_;
|
|
|
|
print "-> really_is_smarty($filename)\n" if $opt_v > 2;
|
|
|
|
my @lines = read_file($filename);
|
|
|
|
my $points = 0;
|
|
foreach my $L (@lines) {
|
|
if (($L =~ /\{(if|include)\s/) or
|
|
($L =~ /\{\/if\}/) or
|
|
($L =~ /(\{\*|\*\})/) or
|
|
($L =~ /\{\$\w/)) {
|
|
++$points;
|
|
}
|
|
last if $points >= 2;
|
|
}
|
|
print "<- really_is_smarty(points=$points)\n" if $opt_v > 2;
|
|
return $points >= 2;
|
|
} # 1}}}
|
|
sub check_alternate_config_files { # {{{1
|
|
my ($list_file, $exclude_list_file, $read_lang_def,
|
|
$force_lang_def, $diff_list_file, ) = @_;
|
|
my $found_it = "";
|
|
foreach my $file ($list_file,
|
|
$exclude_list_file,
|
|
$read_lang_def,
|
|
$force_lang_def,
|
|
$diff_list_file ) {
|
|
next unless defined $file;
|
|
my $dir = dirname $file;
|
|
next unless can_read($dir) and is_dir($dir);
|
|
my $bn = basename $config_file;
|
|
if (can_read("$dir/$bn")) {
|
|
$found_it = "$dir/$bn";
|
|
print "Using configuration file $found_it\n" if $opt_v;
|
|
last;
|
|
}
|
|
}
|
|
return $found_it;
|
|
}
|
|
# 1}}}
|
|
sub write_null_results { # {{{
|
|
my ($json, $xml, $report_file,) = @_;
|
|
print "-> write_null_results\n" if $opt_v > 2;
|
|
if ((defined $json) or (defined $xml)) {
|
|
my $line = "";
|
|
if (defined $json) {
|
|
$line = "{}";
|
|
} else {
|
|
$line = '<?xml version="1.0" encoding="UTF-8"?><results/>';
|
|
}
|
|
if (defined $report_file) {
|
|
open OUT, ">$report_file" or die "Cannot write to $report_file $!\n";
|
|
print OUT "$line\n";
|
|
close OUT;
|
|
} else {
|
|
print "$line\n";
|
|
}
|
|
}
|
|
print "<- write_null_results\n" if $opt_v > 2;
|
|
} # }}}
|
|
sub glob2regex { # {{{
|
|
# convert simple xpath-style glob pattern to a regex
|
|
my $globstr = shift;
|
|
my $re = $globstr;
|
|
$re =~ s{^["']}{};
|
|
$re =~ s{^\.\/}{};
|
|
$re =~ s{["']$}{};
|
|
$re =~ s{\.}{\\.}g;
|
|
$re =~ s{\*\*}{\cx}g; # ctrl x = .*?
|
|
$re =~ s{\*}{\cy}g; # ctrl y = [^/]*
|
|
$re =~ s{\cx}{.*?}g;
|
|
$re =~ s{\cy}{[^/]*}g;
|
|
return '^' . $re . '$';
|
|
} # }}}
|
|
sub load_json { # {{{1
|
|
#
|
|
# Load a cloc-generated JSON string into %contents
|
|
# $contents{filename}{blank|comment|code|language} = value
|
|
# then print in a variety of formats.
|
|
#
|
|
my ($json_string, ) = @_;
|
|
print "-> load_json()\n" if $opt_v > 2;
|
|
|
|
my %contents = ();
|
|
my $heading = undef;
|
|
foreach (split /\n/, $json_string) {
|
|
if (/^{?"(.*?)"/) {
|
|
$heading = $1;
|
|
} else {
|
|
if (/^\s+"(.*?)"\s*:\s+(\d+(\.\d+)?)\b/) {
|
|
# numeric value
|
|
$contents{$heading}{$1} = $2;
|
|
} elsif (/^\s+"(.*?)"\s*:\s+"(.*?)"/) {
|
|
$contents{$heading}{$1} = $2;
|
|
}
|
|
}
|
|
}
|
|
my $url = $contents{'header'}{'cloc_url'};
|
|
my $ver = $contents{'header'}{'cloc_version'};
|
|
my $sec = $contents{'header'}{'elapsed_seconds'};
|
|
my $n_file = $contents{'header'}{'n_files'};
|
|
my $n_line = $contents{'header'}{'n_lines'};
|
|
$sec = $sec == 0 ? 1.0e-3 : $sec;
|
|
my $header = sprintf "%s v %s T=%.2f s (%.1f files/s, %.1f lines/s)",
|
|
$url, $ver, $sec, $n_file/$sec, $n_line/$sec;
|
|
delete $contents{'header'};
|
|
delete $contents{'SUM'};
|
|
|
|
my @file_list = (sort { $contents{$b}{'code'} <=>
|
|
$contents{$a}{'code'} } keys %contents );
|
|
#die Dumper(\%contents);
|
|
# Determine column widths for output
|
|
my $file_len = 0;
|
|
my $lang_len = 0;
|
|
foreach my $file (keys %contents) {
|
|
my $flen = length $file;
|
|
my $llen = length $contents{$file}{'language'};
|
|
$file_len = $file_len > $flen ? $file_len : $flen;
|
|
$lang_len = $lang_len > $llen ? $lang_len : $llen;
|
|
}
|
|
print "<- load_json()\n" if $opt_v > 2;
|
|
return $file_len, $lang_len, $header, %contents;
|
|
}
|
|
# 1}}}
|
|
sub print_format_n { # {{{1
|
|
# by file with
|
|
# format 1 : Language | files | blank | comment | code
|
|
# format 2 : Language | files | blank | comment | code | total
|
|
# format 3 : File | Language | blank | comment | code
|
|
# format 4 : File | blank | comment | code | total
|
|
# format 5 : File | Language | blank | comment | code | total
|
|
my ($format, $file_len, $lang_len, $header, %contents) = @_;
|
|
print "-> print_format_n($format)\n" if $opt_v > 2;
|
|
my @prt_lines = ();
|
|
|
|
# 8 = characters in "Language"
|
|
$lang_len = max(8, $lang_len);
|
|
my %str_fmt = (
|
|
1 => sprintf("%%-%ds %%7s %%7s %%7s %%7s\n", $lang_len),
|
|
2 => sprintf("%%-%ds %%7s %%7s %%7s %%7s %%7s\n", $lang_len),
|
|
3 => sprintf("%%-%ds %%-%ds %%7s %%7s %%7s\n", $file_len, $lang_len),
|
|
4 => sprintf("%%-%ds %%7s %%7s %%7s %%7s\n", $file_len),
|
|
5 => sprintf("%%-%ds %%-%ds %%7s %%7s %%7s %%7s\n", $file_len, $lang_len),
|
|
);
|
|
my %val_fmt = (
|
|
1 => sprintf("%%-%ds %%7d %%7d %%7d %%7d\n", $lang_len),
|
|
2 => sprintf("%%-%ds %%7d %%7d %%7d %%7d %%7d\n", $lang_len),
|
|
3 => sprintf("%%-%ds %%-%ds %%7d %%7d %%7d\n", $file_len, $lang_len),
|
|
4 => sprintf("%%-%ds %%7d %%7d %%7d %%7d\n", $file_len),
|
|
5 => sprintf("%%-%ds %%-%ds %%7d %%7d %%7d %%7d\n", $file_len, $lang_len),
|
|
);
|
|
my %language = ();
|
|
foreach my $file (keys %contents) {
|
|
my $lang = $contents{$file}{'language'};
|
|
$language{$lang}{'files'} += 1;
|
|
foreach my $category ('blank', 'comment', 'code',) {
|
|
$language{$lang}{$category} += $contents{$file}{$category};
|
|
$language{$lang}{'total'} += $contents{$file}{$category};
|
|
}
|
|
}
|
|
my @file_list = (sort { $contents{$b}{'code'} <=>
|
|
$contents{$a}{'code'} } keys %contents );
|
|
my @lang_list = (sort { $language{$b}{'code'} <=>
|
|
$language{$a}{'code'} } keys %language );
|
|
|
|
my %hyphens = (
|
|
1 => "-" x ($lang_len + 4*9),
|
|
2 => "-" x ($lang_len + 5*9),
|
|
3 => "-" x ($lang_len + $file_len + 2 + 3*9),
|
|
4 => "-" x ($file_len + 4*9),
|
|
5 => "-" x ($lang_len + $file_len + 2 + 4*9),
|
|
);
|
|
my %col_headings = (
|
|
1 => ["Language", "files", "blank", "comment", "code"],
|
|
2 => ["Language", "files", "blank", "comment", "code", "Total"],
|
|
3 => ["File", "Language", "blank", "comment", "code"],
|
|
4 => ["File", "blank", "comment", "code", "Total"],
|
|
5 => ["File", "Language", "blank", "comment", "code", "Total"],
|
|
);
|
|
|
|
push @prt_lines, "$header\n";
|
|
push @prt_lines, "$hyphens{$format}\n";
|
|
push @prt_lines, sprintf $str_fmt{$format}, @{$col_headings{$format}};
|
|
push @prt_lines, "$hyphens{$format}\n";
|
|
my ($n_files, $n_blank, $n_comment, $n_code, $n_total) = (0, 0, 0, 0, 0);
|
|
my @out;
|
|
if ($format < 3) {
|
|
# by language
|
|
foreach my $lang (@lang_list) {
|
|
my ($nF, $nB, $nCm, $nCo) = ($language{$lang}{'files'},
|
|
$language{$lang}{'blank'},
|
|
$language{$lang}{'comment'},
|
|
$language{$lang}{'code'});
|
|
if ($format == 1) {
|
|
@out = ($lang, $nF, $nB, $nCm, $nCo);
|
|
} else {
|
|
@out = ($lang, $nF, $nB, $nCm, $nCo, $nB + $nCm + $nCo);
|
|
}
|
|
push @prt_lines, sprintf $val_fmt{$format}, @out;
|
|
$n_files += $nF;
|
|
$n_blank += $nB;
|
|
$n_comment += $nCm;
|
|
$n_code += $nCo;
|
|
$n_total += $nB + $nCm + $nCo;
|
|
}
|
|
} else {
|
|
# by file
|
|
foreach my $file (@file_list) {
|
|
my ($nB, $nCm, $nCo) = ($contents{$file}{'blank'},
|
|
$contents{$file}{'comment'},
|
|
$contents{$file}{'code'});
|
|
my $lang = $contents{$file}{'language'};
|
|
if ($format == 1) {
|
|
} elsif ($format == 3) {
|
|
@out = ($file, $lang, $nB, $nCm, $nCo);
|
|
} elsif ($format == 4) {
|
|
@out = ($file, $nB, $nCm, $nCo, $nB + $nCm + $nCo);
|
|
} else {
|
|
@out = ($file, $lang, $nB, $nCm, $nCo, $nB + $nCm + $nCo);
|
|
}
|
|
push @prt_lines, sprintf $val_fmt{$format}, @out;
|
|
$n_blank += $nB;
|
|
$n_comment += $nCm;
|
|
$n_code += $nCo;
|
|
$n_total += $nB + $nCm + $nCo;
|
|
}
|
|
}
|
|
push @prt_lines, "$hyphens{$format}\n";
|
|
if (scalar @file_list > 1) {
|
|
if ($format == 1) {
|
|
@out = ( "SUM", $n_files, $n_blank, $n_comment, $n_code );
|
|
} elsif ($format == 2) {
|
|
@out = ( "SUM", $n_files, $n_blank, $n_comment, $n_code, $n_total );
|
|
} elsif ($format == 3) {
|
|
@out = ( "SUM", " ", $n_blank, $n_comment, $n_code );
|
|
} elsif ($format == 4) {
|
|
@out = ( "SUM", $n_blank, $n_comment, $n_code, $n_total );
|
|
} else {
|
|
@out = ( "SUM", " ", $n_blank, $n_comment, $n_code, $n_total );
|
|
}
|
|
push @prt_lines, sprintf $val_fmt{$format}, @out;
|
|
push @prt_lines, "$hyphens{$format}\n";
|
|
}
|
|
return @prt_lines;
|
|
print "<- print_format_n()\n" if $opt_v > 2;
|
|
} # 1}}}
|
|
# really_is_pascal, really_is_incpascal, really_is_php from SLOCCount
|
|
my %php_files = (); # really_is_php()
|
|
sub really_is_pascal { # {{{1
|
|
# Given filename, returns TRUE if its contents really are Pascal.
|
|
|
|
# This isn't as obvious as it seems.
|
|
# Many ".p" files are Perl files
|
|
# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p),
|
|
# others are C extractions
|
|
# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p
|
|
# and some files in linuxconf).
|
|
# However, test files in "p2c" really are Pascal, for example.
|
|
|
|
# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p
|
|
# is actually C code. The heuristics determine that they're not Pascal,
|
|
# but because it ends in ".p" it's not counted as C code either.
|
|
# I believe this is actually correct behavior, because frankly it
|
|
# looks like it's automatically generated (it's a bitmap expressed as code).
|
|
# Rather than guess otherwise, we don't include it in a list of
|
|
# source files. Let's face it, someone who creates C files ending in ".p"
|
|
# and expects them to be counted by default as C files in SLOCCount needs
|
|
# their head examined. I suggest examining their head
|
|
# with a sucker rod (see syslogd(8) for more on sucker rods).
|
|
|
|
# This heuristic counts as Pascal such files such as:
|
|
# /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p
|
|
# Which is hand-generated. We don't count woven documents now anyway,
|
|
# so this is justifiable.
|
|
|
|
my $filename = shift;
|
|
chomp($filename);
|
|
|
|
# The heuristic is as follows: it's Pascal _IF_ it has all of the following
|
|
# (ignoring {...} and (*...*) comments):
|
|
# 1. "^..program NAME" or "^..unit NAME",
|
|
# 2. "procedure", "function", "^..interface", or "^..implementation",
|
|
# 3. a "begin", and
|
|
# 4. it ends with "end.",
|
|
#
|
|
# Or it has all of the following:
|
|
# 1. "^..module NAME" and
|
|
# 2. it ends with "end.".
|
|
#
|
|
# Or it has all of the following:
|
|
# 1. "^..program NAME",
|
|
# 2. a "begin", and
|
|
# 3. it ends with "end.".
|
|
#
|
|
# The "end." requirements in particular filter out non-Pascal.
|
|
#
|
|
# Note (jgb): this does not detect Pascal main files in fpc, like
|
|
# fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in
|
|
# it
|
|
|
|
my $is_pascal = 0; # Value to determine.
|
|
|
|
my $has_program = 0;
|
|
my $has_unit = 0;
|
|
my $has_module = 0;
|
|
my $has_procedure_or_function = 0;
|
|
my $found_begin = 0;
|
|
my $found_terminating_end = 0;
|
|
my $has_begin = 0;
|
|
|
|
my $PASCAL_FILE = open_file('<', $filename, 0);
|
|
die "Can't open $filename to determine if it's pascal.\n" if !defined $PASCAL_FILE;
|
|
while(<$PASCAL_FILE>) {
|
|
s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective.
|
|
s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective.
|
|
if (m/\bprogram\s+[A-Za-z]/i) {$has_program=1;}
|
|
if (m/\bunit\s+[A-Za-z]/i) {$has_unit=1;}
|
|
if (m/\bmodule\s+[A-Za-z]/i) {$has_module=1;}
|
|
if (m/\bprocedure\b/i) { $has_procedure_or_function = 1; }
|
|
if (m/\bfunction\b/i) { $has_procedure_or_function = 1; }
|
|
if (m/^\s*interface\s+/i) { $has_procedure_or_function = 1; }
|
|
if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; }
|
|
if (m/\bbegin\b/i) { $has_begin = 1; }
|
|
# Originally I said:
|
|
# "This heuristic fails if there are multi-line comments after
|
|
# "end."; I haven't seen that in real Pascal programs:"
|
|
# But jgb found there are a good quantity of them in Debian, specially in
|
|
# fpc (at the end of a lot of files there is a multiline comment
|
|
# with the changelog for the file).
|
|
# Therefore, assume Pascal if "end." appears anywhere in the file.
|
|
if (m/end\.\s*$/i) {$found_terminating_end = 1;}
|
|
# elsif (m/\S/) {$found_terminating_end = 0;}
|
|
}
|
|
close($PASCAL_FILE);
|
|
|
|
# Okay, we've examined the entire file looking for clues;
|
|
# let's use those clues to determine if it's really Pascal:
|
|
|
|
if ( ( ($has_unit || $has_program) && $has_procedure_or_function &&
|
|
$has_begin && $found_terminating_end ) ||
|
|
( $has_module && $found_terminating_end ) ||
|
|
( $has_program && $has_begin && $found_terminating_end ) )
|
|
{$is_pascal = 1;}
|
|
|
|
return $is_pascal;
|
|
} # 1}}}
|
|
sub really_is_incpascal { # {{{1
|
|
# Given filename, returns TRUE if its contents really are Pascal.
|
|
# For .inc files (mainly seen in fpc)
|
|
|
|
my $filename = shift;
|
|
chomp($filename);
|
|
|
|
# The heuristic is as follows: it is Pascal if any of the following:
|
|
# 1. really_is_pascal returns true
|
|
# 2. Any usual reserved word is found (program, unit, const, begin...)
|
|
|
|
# If the general routine for Pascal files works, we have it
|
|
if (really_is_pascal($filename)) {
|
|
return 1;
|
|
}
|
|
|
|
my $is_pascal = 0; # Value to determine.
|
|
my $found_begin = 0;
|
|
|
|
my $PASCAL_FILE = open_file('<', $filename, 0);
|
|
die "Can't open $filename to determine if it's pascal.\n" if !defined $PASCAL_FILE;
|
|
while(<$PASCAL_FILE>) {
|
|
s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective.
|
|
s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective.
|
|
if (m/\bprogram\s+[A-Za-z]/i) {$is_pascal=1;}
|
|
if (m/\bunit\s+[A-Za-z]/i) {$is_pascal=1;}
|
|
if (m/\bmodule\s+[A-Za-z]/i) {$is_pascal=1;}
|
|
if (m/\bprocedure\b/i) {$is_pascal = 1; }
|
|
if (m/\bfunction\b/i) {$is_pascal = 1; }
|
|
if (m/^\s*interface\s+/i) {$is_pascal = 1; }
|
|
if (m/^\s*implementation\s+/i) {$is_pascal = 1; }
|
|
if (m/\bconstant\s+/i) {$is_pascal=1;}
|
|
if (m/\bbegin\b/i) { $found_begin = 1; }
|
|
if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;}
|
|
if ($is_pascal) {
|
|
last;
|
|
}
|
|
}
|
|
|
|
close($PASCAL_FILE);
|
|
return $is_pascal;
|
|
} # 1}}}
|
|
sub really_is_php { # {{{1
|
|
# Given filename, returns TRUE if its contents really is php.
|
|
|
|
my $filename = shift;
|
|
chomp($filename);
|
|
|
|
my $is_php = 0; # Value to determine.
|
|
# Need to find a matching pair of surrounds, with ending after beginning:
|
|
my $normal_surround = 0; # <?; bit 0 = <?, bit 1 = ?>
|
|
my $script_surround = 0; # <script..>; bit 0 = <script language="php">
|
|
my $asp_surround = 0; # <%; bit 0 = <%, bit 1 = %>
|
|
|
|
# Return cached result, if available:
|
|
if ($php_files{$filename}) { return $php_files{$filename};}
|
|
|
|
my $PHP_FILE = open_file('<', $filename, 0);
|
|
die "Can't open $filename to determine if it's php.\n" if !defined $PHP_FILE;
|
|
while(<$PHP_FILE>) {
|
|
if (m/\<\?/) { $normal_surround |= 1; }
|
|
if (m/\?\>/ && ($normal_surround & 1)) { $normal_surround |= 2; }
|
|
if (m/\<script.*language="?php"?/i) { $script_surround |= 1; }
|
|
if (m/\<\/script\>/i && ($script_surround & 1)) { $script_surround |= 2; }
|
|
if (m/\<\%/) { $asp_surround |= 1; }
|
|
if (m/\%\>/ && ($asp_surround & 1)) { $asp_surround |= 2; }
|
|
}
|
|
close($PHP_FILE);
|
|
|
|
if ( ($normal_surround == 3) || ($script_surround == 3) ||
|
|
($asp_surround == 3)) {
|
|
$is_php = 1;
|
|
}
|
|
|
|
$php_files{$filename} = $is_php; # Store result in cache.
|
|
|
|
return $is_php;
|
|
} # 1}}}
|
|
# vendored modules
|
|
sub Install_Regexp_Common { # {{{1
|
|
# Installs portions of Damian Conway's & Abigail's Regexp::Common
|
|
# module, version 2017060201 into a temporary directory for the
|
|
# duration of this run.
|
|
my %Regexp_Common_Contents = ();
|
|
$Regexp_Common_Contents{'Common'} = <<'EOCommon'; # {{{2
|
|
package Regexp::Common;
|
|
|
|
use 5.10.0;
|
|
use strict;
|
|
|
|
use warnings;
|
|
no warnings 'syntax';
|
|
|
|
our $VERSION = '2017060201';
|
|
our %RE;
|
|
our %sub_interface;
|
|
our $AUTOLOAD;
|
|
|
|
|
|
sub _croak {
|
|
require Carp;
|
|
goto &Carp::croak;
|
|
}
|
|
|
|
sub _carp {
|
|
require Carp;
|
|
goto &Carp::carp;
|
|
}
|
|
|
|
sub new {
|
|
my ($class, @data) = @_;
|
|
my %self;
|
|
tie %self, $class, @data;
|
|
return \%self;
|
|
}
|
|
|
|
sub TIEHASH {
|
|
my ($class, @data) = @_;
|
|
bless \@data, $class;
|
|
}
|
|
|
|
sub FETCH {
|
|
my ($self, $extra) = @_;
|
|
return bless ref($self)->new(@$self, $extra), ref($self);
|
|
}
|
|
|
|
my %imports = map {$_ => "Regexp::Common::$_"}
|
|
qw /balanced CC comment delimited lingua list
|
|
net number profanity SEN URI whitespace
|
|
zip/;
|
|
|
|
sub import {
|
|
shift; # Shift off the class.
|
|
tie %RE, __PACKAGE__;
|
|
{
|
|
no strict 'refs';
|
|
*{caller() . "::RE"} = \%RE;
|
|
}
|
|
|
|
my $saw_import;
|
|
my $no_defaults;
|
|
my %exclude;
|
|
foreach my $entry (grep {!/^RE_/} @_) {
|
|
if ($entry eq 'pattern') {
|
|
no strict 'refs';
|
|
*{caller() . "::pattern"} = \&pattern;
|
|
next;
|
|
}
|
|
# This used to prevent $; from being set. We still recognize it,
|
|
# but we won't do anything.
|
|
if ($entry eq 'clean') {
|
|
next;
|
|
}
|
|
if ($entry eq 'no_defaults') {
|
|
$no_defaults ++;
|
|
next;
|
|
}
|
|
if (my $module = $imports {$entry}) {
|
|
$saw_import ++;
|
|
eval "require $module;";
|
|
die $@ if $@;
|
|
next;
|
|
}
|
|
if ($entry =~ /^!(.*)/ && $imports {$1}) {
|
|
$exclude {$1} ++;
|
|
next;
|
|
}
|
|
# As a last resort, try to load the argument.
|
|
my $module = $entry =~ /^Regexp::Common/
|
|
? $entry
|
|
: "Regexp::Common::" . $entry;
|
|
eval "require $module;";
|
|
die $@ if $@;
|
|
}
|
|
|
|
unless ($saw_import || $no_defaults) {
|
|
foreach my $module (values %imports) {
|
|
next if $exclude {$module};
|
|
eval "require $module;";
|
|
die $@ if $@;
|
|
}
|
|
}
|
|
|
|
my %exported;
|
|
foreach my $entry (grep {/^RE_/} @_) {
|
|
if ($entry =~ /^RE_(\w+_)?ALL$/) {
|
|
my $m = defined $1 ? $1 : "";
|
|
my $re = qr /^RE_${m}.*$/;
|
|
while (my ($sub, $interface) = each %sub_interface) {
|
|
next if $exported {$sub};
|
|
next unless $sub =~ /$re/;
|
|
{
|
|
no strict 'refs';
|
|
*{caller() . "::$sub"} = $interface;
|
|
}
|
|
$exported {$sub} ++;
|
|
}
|
|
}
|
|
else {
|
|
next if $exported {$entry};
|
|
_croak "Can't export unknown subroutine &$entry"
|
|
unless $sub_interface {$entry};
|
|
{
|
|
no strict 'refs';
|
|
*{caller() . "::$entry"} = $sub_interface {$entry};
|
|
}
|
|
$exported {$entry} ++;
|
|
}
|
|
}
|
|
}
|
|
|
|
sub AUTOLOAD { _croak "Can't $AUTOLOAD" }
|
|
|
|
sub DESTROY {}
|
|
|
|
my %cache;
|
|
|
|
my $fpat = qr/^(-\w+)/;
|
|
|
|
sub _decache {
|
|
my @args = @{tied %{$_[0]}};
|
|
my @nonflags = grep {!/$fpat/} @args;
|
|
my $cache = get_cache(@nonflags);
|
|
_croak "Can't create unknown regex: \$RE{"
|
|
. join("}{",@args) . "}"
|
|
unless exists $cache->{__VAL__};
|
|
_croak "Perl $] does not support the pattern "
|
|
. "\$RE{" . join("}{",@args)
|
|
. "}.\nYou need Perl $cache->{__VAL__}{version} or later"
|
|
unless ($cache->{__VAL__}{version}||0) <= $];
|
|
my %flags = ( %{$cache->{__VAL__}{default}},
|
|
map { /$fpat\Q$;\E(.*)/ ? ($1 => $2)
|
|
: /$fpat/ ? ($1 => undef)
|
|
: ()
|
|
} @args);
|
|
$cache->{__VAL__}->_clone_with(\@args, \%flags);
|
|
}
|
|
|
|
use overload q{""} => \&_decache;
|
|
|
|
|
|
sub get_cache {
|
|
my $cache = \%cache;
|
|
foreach (@_) {
|
|
$cache = $cache->{$_}
|
|
|| ($cache->{$_} = {});
|
|
}
|
|
return $cache;
|
|
}
|
|
|
|
sub croak_version {
|
|
my ($entry, @args) = @_;
|
|
}
|
|
|
|
sub pattern {
|
|
my %spec = @_;
|
|
_croak 'pattern() requires argument: name => [ @list ]'
|
|
unless $spec{name} && ref $spec{name} eq 'ARRAY';
|
|
_croak 'pattern() requires argument: create => $sub_ref_or_string'
|
|
unless $spec{create};
|
|
|
|
if (ref $spec{create} ne "CODE") {
|
|
my $fixed_str = "$spec{create}";
|
|
$spec{create} = sub { $fixed_str }
|
|
}
|
|
|
|
my @nonflags;
|
|
my %default;
|
|
foreach ( @{$spec{name}} ) {
|
|
if (/$fpat=(.*)/) {
|
|
$default{$1} = $2;
|
|
}
|
|
elsif (/$fpat\s*$/) {
|
|
$default{$1} = undef;
|
|
}
|
|
else {
|
|
push @nonflags, $_;
|
|
}
|
|
}
|
|
|
|
my $entry = get_cache(@nonflags);
|
|
|
|
if ($entry->{__VAL__}) {
|
|
_carp "Overriding \$RE{"
|
|
. join("}{",@nonflags)
|
|
. "}";
|
|
}
|
|
|
|
$entry->{__VAL__} = bless {
|
|
create => $spec{create},
|
|
match => $spec{match} || \&generic_match,
|
|
subs => $spec{subs} || \&generic_subs,
|
|
version => $spec{version},
|
|
default => \%default,
|
|
}, 'Regexp::Common::Entry';
|
|
|
|
foreach (@nonflags) {s/\W/X/g}
|
|
my $subname = "RE_" . join ("_", @nonflags);
|
|
$sub_interface{$subname} = sub {
|
|
push @_ => undef if @_ % 2;
|
|
my %flags = @_;
|
|
my $pat = $spec{create}->($entry->{__VAL__},
|
|
{%default, %flags}, \@nonflags);
|
|
if (exists $flags{-keep}) { $pat =~ s/\Q(?k:/(/g; }
|
|
else { $pat =~ s/\Q(?k:/(?:/g; }
|
|
return exists $flags {-i} ? qr /(?i:$pat)/ : qr/$pat/;
|
|
};
|
|
|
|
return 1;
|
|
}
|
|
|
|
sub generic_match {$_ [1] =~ /$_[0]/}
|
|
sub generic_subs {$_ [1] =~ s/$_[0]/$_[2]/}
|
|
|
|
sub matches {
|
|
my ($self, $str) = @_;
|
|
my $entry = $self -> _decache;
|
|
$entry -> {match} -> ($entry, $str);
|
|
}
|
|
|
|
sub subs {
|
|
my ($self, $str, $newstr) = @_;
|
|
my $entry = $self -> _decache;
|
|
$entry -> {subs} -> ($entry, $str, $newstr);
|
|
return $str;
|
|
}
|
|
|
|
|
|
package Regexp::Common::Entry;
|
|
# use Carp;
|
|
|
|
use overload
|
|
q{""} => sub {
|
|
my ($self) = @_;
|
|
my $pat = $self->{create}->($self, $self->{flags}, $self->{args});
|
|
if (exists $self->{flags}{-keep}) {
|
|
$pat =~ s/\Q(?k:/(/g;
|
|
}
|
|
else {
|
|
$pat =~ s/\Q(?k:/(?:/g;
|
|
}
|
|
if (exists $self->{flags}{-i}) { $pat = "(?i)$pat" }
|
|
return $pat;
|
|
};
|
|
|
|
sub _clone_with {
|
|
my ($self, $args, $flags) = @_;
|
|
bless { %$self, args=>$args, flags=>$flags }, ref $self;
|
|
}
|
|
|
|
1;
|
|
|
|
__END__
|
|
|
|
=pod
|
|
|
|
=head1 NAME
|
|
|
|
Regexp::Common - Provide commonly requested regular expressions
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
# STANDARD USAGE
|
|
|
|
use Regexp::Common;
|
|
|
|
while (<>) {
|
|
/$RE{num}{real}/ and print q{a number};
|
|
/$RE{quoted}/ and print q{a ['"`] quoted string};
|
|
m[$RE{delimited}{-delim=>'/'}] and print q{a /.../ sequence};
|
|
/$RE{balanced}{-parens=>'()'}/ and print q{balanced parentheses};
|
|
/$RE{profanity}/ and print q{a #*@%-ing word};
|
|
}
|
|
|
|
|
|
# SUBROUTINE-BASED INTERFACE
|
|
|
|
use Regexp::Common 'RE_ALL';
|
|
|
|
while (<>) {
|
|
$_ =~ RE_num_real() and print q{a number};
|
|
$_ =~ RE_quoted() and print q{a ['"`] quoted string};
|
|
$_ =~ RE_delimited(-delim=>'/') and print q{a /.../ sequence};
|
|
$_ =~ RE_balanced(-parens=>'()'} and print q{balanced parentheses};
|
|
$_ =~ RE_profanity() and print q{a #*@%-ing word};
|
|
}
|
|
|
|
|
|
# IN-LINE MATCHING...
|
|
|
|
if ( $RE{num}{int}->matches($text) ) {...}
|
|
|
|
|
|
# ...AND SUBSTITUTION
|
|
|
|
my $cropped = $RE{ws}{crop}->subs($uncropped);
|
|
|
|
|
|
# ROLL-YOUR-OWN PATTERNS
|
|
|
|
use Regexp::Common 'pattern';
|
|
|
|
pattern name => ['name', 'mine'],
|
|
create => '(?i:J[.]?\s+A[.]?\s+Perl-Hacker)',
|
|
;
|
|
|
|
my $name_matcher = $RE{name}{mine};
|
|
|
|
pattern name => [ 'lineof', '-char=_' ],
|
|
create => sub {
|
|
my $flags = shift;
|
|
my $char = quotemeta $flags->{-char};
|
|
return '(?:^$char+$)';
|
|
},
|
|
match => sub {
|
|
my ($self, $str) = @_;
|
|
return $str !~ /[^$self->{flags}{-char}]/;
|
|
},
|
|
subs => sub {
|
|
my ($self, $str, $replacement) = @_;
|
|
$_[1] =~ s/^$self->{flags}{-char}+$//g;
|
|
},
|
|
;
|
|
|
|
my $asterisks = $RE{lineof}{-char=>'*'};
|
|
|
|
# DECIDING WHICH PATTERNS TO LOAD.
|
|
|
|
use Regexp::Common qw /comment number/; # Comment and number patterns.
|
|
use Regexp::Common qw /no_defaults/; # Don't load any patterns.
|
|
use Regexp::Common qw /!delimited/; # All, but delimited patterns.
|
|
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
By default, this module exports a single hash (C<%RE>) that stores or generates
|
|
commonly needed regular expressions (see L<"List of available patterns">).
|
|
|
|
There is an alternative, subroutine-based syntax described in
|
|
L<"Subroutine-based interface">.
|
|
|
|
|
|
=head2 General syntax for requesting patterns
|
|
|
|
To access a particular pattern, C<%RE> is treated as a hierarchical hash of
|
|
hashes (of hashes...), with each successive key being an identifier. For
|
|
example, to access the pattern that matches real numbers, you
|
|
specify:
|
|
|
|
$RE{num}{real}
|
|
|
|
and to access the pattern that matches integers:
|
|
|
|
$RE{num}{int}
|
|
|
|
Deeper layers of the hash are used to specify I<flags>: arguments that
|
|
modify the resulting pattern in some way. The keys used to access these
|
|
layers are prefixed with a minus sign and may have a value; if a value
|
|
is given, it's done by using a multidimensional key.
|
|
For example, to access the pattern that
|
|
matches base-2 real numbers with embedded commas separating
|
|
groups of three digits (e.g. 10,101,110.110101101):
|
|
|
|
$RE{num}{real}{-base => 2}{-sep => ','}{-group => 3}
|
|
|
|
Through the magic of Perl, these flag layers may be specified in any order
|
|
(and even interspersed through the identifier keys!)
|
|
so you could get the same pattern with:
|
|
|
|
$RE{num}{real}{-sep => ','}{-group => 3}{-base => 2}
|
|
|
|
or:
|
|
|
|
$RE{num}{-base => 2}{real}{-group => 3}{-sep => ','}
|
|
|
|
or even:
|
|
|
|
$RE{-base => 2}{-group => 3}{-sep => ','}{num}{real}
|
|
|
|
etc.
|
|
|
|
Note, however, that the relative order of amongst the identifier keys
|
|
I<is> significant. That is:
|
|
|
|
$RE{list}{set}
|
|
|
|
would not be the same as:
|
|
|
|
$RE{set}{list}
|
|
|
|
=head2 Flag syntax
|
|
|
|
In versions prior to 2.113, flags could also be written as
|
|
C<{"-flag=value"}>. This no longer works, although C<{"-flag$;value"}>
|
|
still does. However, C<< {-flag => 'value'} >> is the preferred syntax.
|
|
|
|
=head2 Universal flags
|
|
|
|
Normally, flags are specific to a single pattern.
|
|
However, there is two flags that all patterns may specify.
|
|
|
|
=over 4
|
|
|
|
=item C<-keep>
|
|
|
|
By default, the patterns provided by C<%RE> contain no capturing
|
|
parentheses. However, if the C<-keep> flag is specified (it requires
|
|
no value) then any significant substrings that the pattern matches
|
|
are captured. For example:
|
|
|
|
if ($str =~ $RE{num}{real}{-keep}) {
|
|
$number = $1;
|
|
$whole = $3;
|
|
$decimals = $5;
|
|
}
|
|
|
|
Special care is needed if a "kept" pattern is interpolated into a
|
|
larger regular expression, as the presence of other capturing
|
|
parentheses is likely to change the "number variables" into which significant
|
|
substrings are saved.
|
|
|
|
See also L<"Adding new regular expressions">, which describes how to create
|
|
new patterns with "optional" capturing brackets that respond to C<-keep>.
|
|
|
|
=item C<-i>
|
|
|
|
Some patterns or subpatterns only match lowercase or uppercase letters.
|
|
If one wants the do case insensitive matching, one option is to use
|
|
the C</i> regexp modifier, or the special sequence C<(?i)>. But if the
|
|
functional interface is used, one does not have this option. The
|
|
C<-i> switch solves this problem; by using it, the pattern will do
|
|
case insensitive matching.
|
|
|
|
=back
|
|
|
|
=head2 OO interface and inline matching/substitution
|
|
|
|
The patterns returned from C<%RE> are objects, so rather than writing:
|
|
|
|
if ($str =~ /$RE{some}{pattern}/ ) {...}
|
|
|
|
you can write:
|
|
|
|
if ( $RE{some}{pattern}->matches($str) ) {...}
|
|
|
|
For matching this would seem to have no great advantage apart from readability
|
|
(but see below).
|
|
|
|
For substitutions, it has other significant benefits. Frequently you want to
|
|
perform a substitution on a string without changing the original. Most people
|
|
use this:
|
|
|
|
$changed = $original;
|
|
$changed =~ s/$RE{some}{pattern}/$replacement/;
|
|
|
|
The more adept use:
|
|
|
|
($changed = $original) =~ s/$RE{some}{pattern}/$replacement/;
|
|
|
|
Regexp::Common allows you do write this:
|
|
|
|
$changed = $RE{some}{pattern}->subs($original=>$replacement);
|
|
|
|
Apart from reducing precedence-angst, this approach has the added
|
|
advantages that the substitution behaviour can be optimized from the
|
|
regular expression, and the replacement string can be provided by
|
|
default (see L<"Adding new regular expressions">).
|
|
|
|
For example, in the implementation of this substitution:
|
|
|
|
$cropped = $RE{ws}{crop}->subs($uncropped);
|
|
|
|
the default empty string is provided automatically, and the substitution is
|
|
optimized to use:
|
|
|
|
$uncropped =~ s/^\s+//;
|
|
$uncropped =~ s/\s+$//;
|
|
|
|
rather than:
|
|
|
|
$uncropped =~ s/^\s+|\s+$//g;
|
|
|
|
|
|
=head2 Subroutine-based interface
|
|
|
|
The hash-based interface was chosen because it allows regexes to be
|
|
effortlessly interpolated, and because it also allows them to be
|
|
"curried". For example:
|
|
|
|
my $num = $RE{num}{int};
|
|
|
|
my $command = $num->{-sep=>','}{-group=>3};
|
|
my $duodecimal = $num->{-base=>12};
|
|
|
|
|
|
However, the use of tied hashes does make the access to Regexp::Common
|
|
patterns slower than it might otherwise be. In contexts where impatience
|
|
overrules laziness, Regexp::Common provides an additional
|
|
subroutine-based interface.
|
|
|
|
For each (sub-)entry in the C<%RE> hash (C<$RE{key1}{key2}{etc}>), there
|
|
is a corresponding exportable subroutine: C<RE_key1_key2_etc()>. The name of
|
|
each subroutine is the underscore-separated concatenation of the I<non-flag>
|
|
keys that locate the same pattern in C<%RE>. Flags are passed to the subroutine
|
|
in its argument list. Thus:
|
|
|
|
use Regexp::Common qw( RE_ws_crop RE_num_real RE_profanity );
|
|
|
|
$str =~ RE_ws_crop() and die "Surrounded by whitespace";
|
|
|
|
$str =~ RE_num_real(-base=>8, -sep=>" ") or next;
|
|
|
|
$offensive = RE_profanity(-keep);
|
|
$str =~ s/$offensive/$bad{$1}++; "<expletive deleted>"/ge;
|
|
|
|
Note that, unlike the hash-based interface (which returns objects), these
|
|
subroutines return ordinary C<qr>'d regular expressions. Hence they do not
|
|
curry, nor do they provide the OO match and substitution inlining described
|
|
in the previous section.
|
|
|
|
It is also possible to export subroutines for all available patterns like so:
|
|
|
|
use Regexp::Common 'RE_ALL';
|
|
|
|
Or you can export all subroutines with a common prefix of keys like so:
|
|
|
|
use Regexp::Common 'RE_num_ALL';
|
|
|
|
which will export C<RE_num_int> and C<RE_num_real> (and if you have
|
|
create more patterns who have first key I<num>, those will be exported
|
|
as well). In general, I<RE_key1_..._keyn_ALL> will export all subroutines
|
|
whose pattern names have first keys I<key1> ... I<keyn>.
|
|
|
|
|
|
=head2 Adding new regular expressions
|
|
|
|
You can add your own regular expressions to the C<%RE> hash at run-time,
|
|
using the exportable C<pattern> subroutine. It expects a hash-like list of
|
|
key/value pairs that specify the behaviour of the pattern. The various
|
|
possible argument pairs are:
|
|
|
|
=over 4
|
|
|
|
=item C<name =E<gt> [ @list ]>
|
|
|
|
A required argument that specifies the name of the pattern, and any
|
|
flags it may take, via a reference to a list of strings. For example:
|
|
|
|
pattern name => [qw( line of -char )],
|
|
# other args here
|
|
;
|
|
|
|
This specifies an entry C<$RE{line}{of}>, which may take a C<-char> flag.
|
|
|
|
Flags may also be specified with a default value, which is then used whenever
|
|
the flag is specified without an explicit value (but not when the flag is
|
|
omitted). For example:
|
|
|
|
pattern name => [qw( line of -char=_ )],
|
|
# default char is '_'
|
|
# other args here
|
|
;
|
|
|
|
|
|
=item C<create =E<gt> $sub_ref_or_string>
|
|
|
|
A required argument that specifies either a string that is to be returned
|
|
as the pattern:
|
|
|
|
pattern name => [qw( line of underscores )],
|
|
create => q/(?:^_+$)/
|
|
;
|
|
|
|
or a reference to a subroutine that will be called to create the pattern:
|
|
|
|
pattern name => [qw( line of -char=_ )],
|
|
create => sub {
|
|
my ($self, $flags) = @_;
|
|
my $char = quotemeta $flags->{-char};
|
|
return '(?:^$char+$)';
|
|
},
|
|
;
|
|
|
|
If the subroutine version is used, the subroutine will be called with
|
|
three arguments: a reference to the pattern object itself, a reference
|
|
to a hash containing the flags and their values,
|
|
and a reference to an array containing the non-flag keys.
|
|
|
|
Whatever the subroutine returns is stringified as the pattern.
|
|
|
|
No matter how the pattern is created, it is immediately postprocessed to
|
|
include or exclude capturing parentheses (according to the value of the
|
|
C<-keep> flag). To specify such "optional" capturing parentheses within
|
|
the regular expression associated with C<create>, use the notation
|
|
C<(?k:...)>. Any parentheses of this type will be converted to C<(...)>
|
|
when the C<-keep> flag is specified, or C<(?:...)> when it is not.
|
|
It is a Regexp::Common convention that the outermost capturing parentheses
|
|
always capture the entire pattern, but this is not enforced.
|
|
|
|
|
|
=item C<match =E<gt> $sub_ref>
|
|
|
|
An optional argument that specifies a subroutine that is to be called when
|
|
the C<$RE{...}-E<gt>matches(...)> method of this pattern is invoked.
|
|
|
|
The subroutine should expect two arguments: a reference to the pattern object
|
|
itself, and the string to be matched against.
|
|
|
|
It should return the same types of values as a C<m/.../> does.
|
|
|
|
pattern name => [qw( line of -char )],
|
|
create => sub {...},
|
|
match => sub {
|
|
my ($self, $str) = @_;
|
|
$str !~ /[^$self->{flags}{-char}]/;
|
|
},
|
|
;
|
|
|
|
|
|
=item C<subs =E<gt> $sub_ref>
|
|
|
|
An optional argument that specifies a subroutine that is to be called when
|
|
the C<$RE{...}-E<gt>subs(...)> method of this pattern is invoked.
|
|
|
|
The subroutine should expect three arguments: a reference to the pattern object
|
|
itself, the string to be changed, and the value to be substituted into it.
|
|
The third argument may be C<undef>, indicating the default substitution is
|
|
required.
|
|
|
|
The subroutine should return the same types of values as an C<s/.../.../> does.
|
|
|
|
For example:
|
|
|
|
pattern name => [ 'lineof', '-char=_' ],
|
|
create => sub {...},
|
|
subs => sub {
|
|
my ($self, $str, $ignore_replacement) = @_;
|
|
$_[1] =~ s/^$self->{flags}{-char}+$//g;
|
|
},
|
|
;
|
|
|
|
Note that such a subroutine will almost always need to modify C<$_[1]> directly.
|
|
|
|
|
|
=item C<version =E<gt> $minimum_perl_version>
|
|
|
|
If this argument is given, it specifies the minimum version of perl required
|
|
to use the new pattern. Attempts to use the pattern with earlier versions of
|
|
perl will generate a fatal diagnostic.
|
|
|
|
=back
|
|
|
|
=head2 Loading specific sets of patterns.
|
|
|
|
By default, all the sets of patterns listed below are made available.
|
|
However, it is possible to indicate which sets of patterns should
|
|
be made available - the wanted sets should be given as arguments to
|
|
C<use>. Alternatively, it is also possible to indicate which sets of
|
|
patterns should not be made available - those sets will be given as
|
|
argument to the C<use> statement, but are preceded with an exclaimation
|
|
mark. The argument I<no_defaults> indicates none of the default patterns
|
|
should be made available. This is useful for instance if all you want
|
|
is the C<pattern()> subroutine.
|
|
|
|
Examples:
|
|
|
|
use Regexp::Common qw /comment number/; # Comment and number patterns.
|
|
use Regexp::Common qw /no_defaults/; # Don't load any patterns.
|
|
use Regexp::Common qw /!delimited/; # All, but delimited patterns.
|
|
|
|
It's also possible to load your own set of patterns. If you have a
|
|
module C<Regexp::Common::my_patterns> that makes patterns available,
|
|
you can have it made available with
|
|
|
|
use Regexp::Common qw /my_patterns/;
|
|
|
|
Note that the default patterns will still be made available - only if
|
|
you use I<no_defaults>, or mention one of the default sets explicitly,
|
|
the non mentioned defaults aren't made available.
|
|
|
|
=head2 List of available patterns
|
|
|
|
The patterns listed below are currently available. Each set of patterns
|
|
has its own manual page describing the details. For each pattern set
|
|
named I<name>, the manual page I<Regexp::Common::name> describes the
|
|
details.
|
|
|
|
Currently available are:
|
|
|
|
=over 4
|
|
|
|
=item Regexp::Common::balanced
|
|
|
|
Provides regexes for strings with balanced parenthesized delimiters.
|
|
|
|
=item Regexp::Common::comment
|
|
|
|
Provides regexes for comments of various languages (43 languages
|
|
currently).
|
|
|
|
=item Regexp::Common::delimited
|
|
|
|
Provides regexes for delimited strings.
|
|
|
|
=item Regexp::Common::lingua
|
|
|
|
Provides regexes for palindromes.
|
|
|
|
=item Regexp::Common::list
|
|
|
|
Provides regexes for lists.
|
|
|
|
=item Regexp::Common::net
|
|
|
|
Provides regexes for IPv4, IPv6, and MAC addresses.
|
|
|
|
=item Regexp::Common::number
|
|
|
|
Provides regexes for numbers (integers and reals).
|
|
|
|
=item Regexp::Common::profanity
|
|
|
|
Provides regexes for profanity.
|
|
|
|
=item Regexp::Common::whitespace
|
|
|
|
Provides regexes for leading and trailing whitespace.
|
|
|
|
=item Regexp::Common::zip
|
|
|
|
Provides regexes for zip codes.
|
|
|
|
=back
|
|
|
|
=head2 Forthcoming patterns and features
|
|
|
|
Future releases of the module will also provide patterns for the following:
|
|
|
|
* email addresses
|
|
* HTML/XML tags
|
|
* more numerical matchers,
|
|
* mail headers (including multiline ones),
|
|
* more URLS
|
|
* telephone numbers of various countries
|
|
* currency (universal 3 letter format, Latin-1, currency names)
|
|
* dates
|
|
* binary formats (e.g. UUencoded, MIMEd)
|
|
|
|
If you have other patterns or pattern generators that you think would be
|
|
generally useful, please send them to the maintainer -- preferably as source
|
|
code using the C<pattern> subroutine. Submissions that include a set of
|
|
tests will be especially welcome.
|
|
|
|
|
|
=head1 DIAGNOSTICS
|
|
|
|
=over 4
|
|
|
|
=item C<Can't export unknown subroutine %s>
|
|
|
|
The subroutine-based interface didn't recognize the requested subroutine.
|
|
Often caused by a spelling mistake or an incompletely specified name.
|
|
|
|
|
|
=item C<Can't create unknown regex: $RE{...}>
|
|
|
|
Regexp::Common doesn't have a generator for the requested pattern.
|
|
Often indicates a misspelt or missing parameter.
|
|
|
|
=item
|
|
C<Perl %f does not support the pattern $RE{...}.
|
|
You need Perl %f or later>
|
|
|
|
The requested pattern requires advanced regex features (e.g. recursion)
|
|
that not available in your version of Perl. Time to upgrade.
|
|
|
|
=item C<< pattern() requires argument: name => [ @list ] >>
|
|
|
|
Every user-defined pattern specification must have a name.
|
|
|
|
=item C<< pattern() requires argument: create => $sub_ref_or_string >>
|
|
|
|
Every user-defined pattern specification must provide a pattern creation
|
|
mechanism: either a pattern string or a reference to a subroutine that
|
|
returns the pattern string.
|
|
|
|
=item C<Base must be between 1 and 36>
|
|
|
|
The C<< $RE{num}{real}{-base=>'I<N>'} >> pattern uses the characters [0-9A-Z]
|
|
to represent the digits of various bases. Hence it only produces
|
|
regular expressions for bases up to hexatricensimal.
|
|
|
|
=item C<Must specify delimiter in $RE{delimited}>
|
|
|
|
The pattern has no default delimiter.
|
|
You need to write: C<< $RE{delimited}{-delim=>I<X>'} >> for some character I<X>
|
|
|
|
=back
|
|
|
|
=head1 ACKNOWLEDGEMENTS
|
|
|
|
Deepest thanks to the many people who have encouraged and contributed to this
|
|
project, especially: Elijah, Jarkko, Tom, Nat, Ed, and Vivek.
|
|
|
|
Further thanks go to: Alexandr Ciornii, Blair Zajac, Bob Stockdale,
|
|
Charles Thomas, Chris Vertonghen, the CPAN Testers, David Hand,
|
|
Fany, Geoffrey Leach, Hermann-Marcus Behrens, Jerome Quelin, Jim Cromie,
|
|
Lars Wilke, Linda Julien, Mike Arms, Mike Castle, Mikko, Murat Uenalan,
|
|
RafaE<235>l Garcia-Suarez, Ron Savage, Sam Vilain, Slaven Rezic, Smylers,
|
|
Tim Maher, and all the others I've forgotten.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Damian Conway (damian@conway.org)
|
|
|
|
=head1 MAINTENANCE
|
|
|
|
This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>.
|
|
|
|
=head1 BUGS AND IRRITATIONS
|
|
|
|
Bound to be plenty.
|
|
|
|
For a start, there are many common regexes missing.
|
|
Send them in to I<regexp-common@abigail.be>.
|
|
|
|
There are some POD issues when installing this module using a pre-5.6.0 perl;
|
|
some manual pages may not install, or may not install correctly using a perl
|
|
that is that old. You might consider upgrading your perl.
|
|
|
|
=head1 NOT A BUG
|
|
|
|
=over 4
|
|
|
|
=item *
|
|
|
|
The various patterns are not anchored. That is, a pattern like
|
|
C<< $RE {num} {int} >> will match against "abc4def", because a
|
|
substring of the subject matches. This is by design, and not a
|
|
bug. If you want the pattern to be anchored, use something like:
|
|
|
|
my $integer = $RE {num} {int};
|
|
$subj =~ /^$integer$/ and print "Matches!\n";
|
|
|
|
=back
|
|
|
|
=head1 LICENSE and COPYRIGHT
|
|
|
|
This software is Copyright (c) 2001 - 2017, Damian Conway and Abigail.
|
|
|
|
This module is free software, and maybe used under any of the following
|
|
licenses:
|
|
|
|
1) The Perl Artistic License. See the file COPYRIGHT.AL.
|
|
2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
|
|
3) The BSD License. See the file COPYRIGHT.BSD.
|
|
4) The MIT License. See the file COPYRIGHT.MIT.
|
|
EOCommon
|
|
# 2}}}
|
|
$Regexp_Common_Contents{'Common/comment'} = <<'EOC'; # {{{2
|
|
package Regexp::Common::comment;
|
|
|
|
use 5.10.0;
|
|
|
|
use strict;
|
|
use warnings;
|
|
no warnings 'syntax';
|
|
|
|
use Regexp::Common qw /pattern clean no_defaults/;
|
|
|
|
our $VERSION = '2017060201';
|
|
|
|
my @generic = (
|
|
{languages => [qw /ABC Forth/],
|
|
to_eol => ['\\\\']}, # This is for just a *single* backslash.
|
|
|
|
{languages => [qw /Ada Alan Eiffel lua/],
|
|
to_eol => ['--']},
|
|
|
|
{languages => [qw /Advisor/],
|
|
to_eol => ['#|//']},
|
|
|
|
{languages => [qw /Advsys CQL Lisp LOGO M MUMPS REBOL Scheme
|
|
SMITH zonefile/],
|
|
to_eol => [';']},
|
|
|
|
{languages => ['Algol 60'],
|
|
from_to => [[qw /comment ;/]]},
|
|
|
|
{languages => [qw {ALPACA B C C-- LPC PL/I}],
|
|
from_to => [[qw {/* */}]]},
|
|
|
|
{languages => [qw /awk fvwm2 Icon m4 mutt Perl Python QML
|
|
R Ruby shell Tcl/],
|
|
to_eol => ['#']},
|
|
|
|
{languages => [[BASIC => 'mvEnterprise']],
|
|
to_eol => ['[*!]|REM']},
|
|
|
|
{languages => [qw /Befunge-98 Funge-98 Shelta/],
|
|
id => [';']},
|
|
|
|
{languages => ['beta-Juliet', 'Crystal Report', 'Portia', 'Ubercode'],
|
|
to_eol => ['//']},
|
|
|
|
{languages => ['BML'],
|
|
from_to => [['<?_c', '_c?>']],
|
|
},
|
|
|
|
{languages => [qw /C++/, 'C#', 'X++', qw /Cg ECMAScript FPL Java JavaScript/],
|
|
to_eol => ['//'],
|
|
from_to => [[qw {/* */}]]},
|
|
|
|
{languages => [qw /CLU LaTeX slrn TeX/],
|
|
to_eol => ['%']},
|
|
|
|
{languages => [qw /False/],
|
|
from_to => [[qw !{ }!]]},
|
|
|
|
{languages => [qw /Fortran/],
|
|
to_eol => ['!']},
|
|
|
|
{languages => [qw /Haifu/],
|
|
id => [',']},
|
|
|
|
{languages => [qw /ILLGOL/],
|
|
to_eol => ['NB']},
|
|
|
|
{languages => [qw /INTERCAL/],
|
|
to_eol => [q{(?:(?:PLEASE(?:\s+DO)?|DO)\s+)?(?:NOT|N'T)}]},
|
|
|
|
{languages => [qw /J/],
|
|
to_eol => ['NB[.]']},
|
|
|
|
{languages => [qw /JavaDoc/],
|
|
from_to => [[qw {/** */}]]},
|
|
|
|
{languages => [qw /Nickle/],
|
|
to_eol => ['#'],
|
|
from_to => [[qw {/* */}]]},
|
|
|
|
{languages => [qw /Oberon/],
|
|
from_to => [[qw /(* *)/]]},
|
|
|
|
{languages => [[qw /Pascal Delphi/], [qw /Pascal Free/], [qw /Pascal GPC/]],
|
|
to_eol => ['//'],
|
|
from_to => [[qw !{ }!], [qw !(* *)!]]},
|
|
|
|
{languages => [[qw /Pascal Workshop/]],
|
|
id => [qw /"/],
|
|
from_to => [[qw !{ }!], [qw !(* *)!], [qw !/* */!]]},
|
|
|
|
{languages => [qw /PEARL/],
|
|
to_eol => ['!'],
|
|
from_to => [[qw {/* */}]]},
|
|
|
|
{languages => [qw /PHP/],
|
|
to_eol => ['#', '//'],
|
|
from_to => [[qw {/* */}]]},
|
|
|
|
{languages => [qw !PL/B!],
|
|
to_eol => ['[.;]']},
|
|
|
|
{languages => [qw !PL/SQL!],
|
|
to_eol => ['--'],
|
|
from_to => [[qw {/* */}]]},
|
|
|
|
{languages => [qw /Q-BAL/],
|
|
to_eol => ['`']},
|
|
|
|
{languages => [qw /Smalltalk/],
|
|
id => ['"']},
|
|
|
|
{languages => [qw /SQL/],
|
|
to_eol => ['-{2,}']},
|
|
|
|
{languages => [qw /troff/],
|
|
to_eol => ['\\\"']},
|
|
|
|
{languages => [qw /vi/],
|
|
to_eol => ['"']},
|
|
|
|
{languages => [qw /*W/],
|
|
from_to => [[qw {|| !!}]]},
|
|
|
|
{languages => [qw /ZZT-OOP/],
|
|
to_eol => ["'"]},
|
|
);
|
|
|
|
my @plain_or_nested = (
|
|
[Caml => undef, "(*" => "*)"],
|
|
[Dylan => "//", "/*" => "*/"],
|
|
[Haskell => "-{2,}", "{-" => "-}"],
|
|
[Hugo => "!(?!\\\\)", "!\\" => "\\!"],
|
|
[SLIDE => "#", "(*" => "*)"],
|
|
['Modula-2' => undef, "(*" => "*)"],
|
|
['Modula-3' => undef, "(*" => "*)"],
|
|
);
|
|
|
|
#
|
|
# Helper subs.
|
|
#
|
|
|
|
sub combine {
|
|
local $_ = join "|", @_;
|
|
if (@_ > 1) {
|
|
s/\(\?k:/(?:/g;
|
|
$_ = "(?k:$_)";
|
|
}
|
|
$_
|
|
}
|
|
|
|
sub to_eol ($) {"(?k:(?k:$_[0])(?k:[^\\n]*)(?k:\\n))"}
|
|
sub id ($) {"(?k:(?k:$_[0])(?k:[^$_[0]]*)(?k:$_[0]))"} # One char only!
|
|
sub from_to {
|
|
my ($begin, $end) = @_;
|
|
|
|
my $qb = quotemeta $begin;
|
|
my $qe = quotemeta $end;
|
|
my $fe = quotemeta substr $end => 0, 1;
|
|
my $te = quotemeta substr $end => 1;
|
|
|
|
"(?k:(?k:$qb)(?k:(?:[^$fe]+|$fe(?!$te))*)(?k:$qe))";
|
|
}
|
|
|
|
|
|
my $count = 0;
|
|
sub nested {
|
|
my ($begin, $end) = @_;
|
|
|
|
$count ++;
|
|
my $r = '(??{$Regexp::Common::comment ['. $count . ']})';
|
|
|
|
my $qb = quotemeta $begin;
|
|
my $qe = quotemeta $end;
|
|
my $fb = quotemeta substr $begin => 0, 1;
|
|
my $fe = quotemeta substr $end => 0, 1;
|
|
|
|
my $tb = quotemeta substr $begin => 1;
|
|
my $te = quotemeta substr $end => 1;
|
|
|
|
use re 'eval';
|
|
|
|
my $re;
|
|
if ($fb eq $fe) {
|
|
$re = qr /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|$r)*$qe)/;
|
|
}
|
|
else {
|
|
local $" = "|";
|
|
my @clauses = "(?>[^$fb$fe]+)";
|
|
push @clauses => "$fb(?!$tb)" if length $tb;
|
|
push @clauses => "$fe(?!$te)" if length $te;
|
|
push @clauses => $r;
|
|
$re = qr /(?:$qb(?:@clauses)*$qe)/;
|
|
}
|
|
|
|
$Regexp::Common::comment [$count] = qr/$re/;
|
|
}
|
|
|
|
#
|
|
# Process data.
|
|
#
|
|
|
|
foreach my $info (@plain_or_nested) {
|
|
my ($language, $mark, $begin, $end) = @$info;
|
|
pattern name => [comment => $language],
|
|
create =>
|
|
sub {my $re = nested $begin => $end;
|
|
my $prefix = defined $mark ? $mark . "[^\n]*\n|" : "";
|
|
exists $_ [1] -> {-keep} ? qr /($prefix$re)/
|
|
: qr /$prefix$re/
|
|
},
|
|
;
|
|
}
|
|
|
|
|
|
foreach my $group (@generic) {
|
|
my $pattern = combine +(map {to_eol $_} @{$group -> {to_eol}}),
|
|
(map {from_to @$_} @{$group -> {from_to}}),
|
|
(map {id $_} @{$group -> {id}}),
|
|
;
|
|
foreach my $language (@{$group -> {languages}}) {
|
|
pattern name => [comment => ref $language ? @$language : $language],
|
|
create => $pattern,
|
|
;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
#
|
|
# Other languages.
|
|
#
|
|
|
|
# http://www.pascal-central.com/docs/iso10206.txt
|
|
pattern name => [qw /comment Pascal/],
|
|
create => '(?k:' . '(?k:[{]|[(][*])'
|
|
. '(?k:[^}*]*(?:[*](?![)])[^}*]*)*)'
|
|
. '(?k:[}]|[*][)])'
|
|
. ')'
|
|
;
|
|
|
|
# http://www.templetons.com/brad/alice/language/
|
|
pattern name => [qw /comment Pascal Alice/],
|
|
create => '(?k:(?k:[{])(?k:[^}\n]*)(?k:[}]))'
|
|
;
|
|
|
|
|
|
# http://westein.arb-phys.uni-dortmund.de/~wb/a68s.txt
|
|
pattern name => [qw (comment), 'Algol 68'],
|
|
create => q {(?k:(?:#[^#]*#)|} .
|
|
q {(?:\bco\b(?:[^c]+|\Bc|\bc(?!o\b))*\bco\b)|} .
|
|
q {(?:\bcomment\b(?:[^c]+|\Bc|\bc(?!omment\b))*\bcomment\b))}
|
|
;
|
|
|
|
|
|
# See rules 91 and 92 of ISO 8879 (SGML).
|
|
# Charles F. Goldfarb: "The SGML Handbook".
|
|
# Oxford: Oxford University Press. 1990. ISBN 0-19-853737-9.
|
|
# Ch. 10.3, pp 390.
|
|
pattern name => [qw (comment HTML)],
|
|
create => q {(?k:(?k:<!)(?k:(?:--(?k:[^-]*(?:-[^-]+)*)--\s*)*)(?k:>))},
|
|
;
|
|
|
|
|
|
pattern name => [qw /comment SQL MySQL/],
|
|
create => q {(?k:(?:#|-- )[^\n]*\n|} .
|
|
q {/\*(?:(?>[^*;"']+)|"[^"]*"|'[^']*'|\*(?!/))*(?:;|\*/))},
|
|
;
|
|
|
|
# Anything that isn't <>[]+-.,
|
|
# http://home.wxs.nl/~faase009/Ha_BF.html
|
|
pattern name => [qw /comment Brainfuck/],
|
|
create => '(?k:[^<>\[\]+\-.,]+)'
|
|
;
|
|
|
|
# Squeak is a variant of Smalltalk-80.
|
|
# http://www.squeak.
|
|
# http://mucow.com/squeak-qref.html
|
|
pattern name => [qw /comment Squeak/],
|
|
create => '(?k:(?k:")(?k:[^"]*(?:""[^"]*)*)(?k:"))'
|
|
;
|
|
|
|
#
|
|
# Scores of less than 5 or above 17....
|
|
# http://www.cliff.biffle.org/esoterica/beatnik.html
|
|
@Regexp::Common::comment::scores = (1, 3, 3, 2, 1, 4, 2, 4, 1, 8,
|
|
5, 1, 3, 1, 1, 3, 10, 1, 1, 1,
|
|
1, 4, 4, 8, 4, 10);
|
|
{
|
|
my ($s, $x);
|
|
pattern name => [qw /comment Beatnik/],
|
|
create => sub {
|
|
use re 'eval';
|
|
my $re = qr {\b([A-Za-z]+)\b
|
|
(?(?{($s, $x) = (0, lc $^N);
|
|
$s += $Regexp::Common::comment::scores
|
|
[ord (chop $x) - ord ('a')] while length $x;
|
|
$s >= 5 && $s < 18})XXX|)}x;
|
|
$re;
|
|
},
|
|
;
|
|
}
|
|
|
|
|
|
# http://www.cray.com/craydoc/manuals/007-3692-005/html-007-3692-005/
|
|
# (Goto table of contents/3.3 Source Form)
|
|
# Fortran, in fixed format. Comments start with a C, c or * in the first
|
|
# column, or a ! anywhere, but the sixth column. Then end with a newline.
|
|
pattern name => [qw /comment Fortran fixed/],
|
|
create => '(?k:(?k:(?:^[Cc*]|(?<!^.....)!))(?k:[^\n]*)(?k:\n))'
|
|
;
|
|
|
|
|
|
# http://www.csis.ul.ie/cobol/Course/COBOLIntro.htm
|
|
# Traditionally, comments in COBOL were indicated with an asterisk in
|
|
# the seventh column. Modern compilers may be more lenient.
|
|
pattern name => [qw /comment COBOL/],
|
|
create => '(?<=^......)(?k:(?k:[*])(?k:[^\n]*)(?k:\n))',
|
|
;
|
|
|
|
1;
|
|
|
|
|
|
__END__
|
|
|
|
=pod
|
|
|
|
=head1 NAME
|
|
|
|
Regexp::Common::comment -- provide regexes for comments.
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use Regexp::Common qw /comment/;
|
|
|
|
while (<>) {
|
|
/$RE{comment}{C}/ and print "Contains a C comment\n";
|
|
/$RE{comment}{C++}/ and print "Contains a C++ comment\n";
|
|
/$RE{comment}{PHP}/ and print "Contains a PHP comment\n";
|
|
/$RE{comment}{Java}/ and print "Contains a Java comment\n";
|
|
/$RE{comment}{Perl}/ and print "Contains a Perl comment\n";
|
|
/$RE{comment}{awk}/ and print "Contains an awk comment\n";
|
|
/$RE{comment}{HTML}/ and print "Contains an HTML comment\n";
|
|
}
|
|
|
|
use Regexp::Common qw /comment RE_comment_HTML/;
|
|
|
|
while (<>) {
|
|
$_ =~ RE_comment_HTML() and print "Contains an HTML comment\n";
|
|
}
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
Please consult the manual of L<Regexp::Common> for a general description
|
|
of the works of this interface.
|
|
|
|
Do not use this module directly, but load it via I<Regexp::Common>.
|
|
|
|
This modules gives you regular expressions for comments in various
|
|
languages.
|
|
|
|
=head2 THE LANGUAGES
|
|
|
|
Below, the comments of each of the languages are described.
|
|
The patterns are available as C<$RE{comment}{I<LANG>}>, foreach
|
|
language I<LANG>. Some languages have variants; it's described
|
|
at the individual languages how to get the patterns for the variants.
|
|
Unless mentioned otherwise,
|
|
C<{-keep}> sets C<$1>, C<$2>, C<$3> and C<$4> to the entire comment,
|
|
the opening marker, the content of the comment, and the closing marker
|
|
(for many languages, the latter is a newline) respectively.
|
|
|
|
=over 4
|
|
|
|
=item ABC
|
|
|
|
Comments in I<ABC> start with a backslash (C<\>), and last till
|
|
the end of the line.
|
|
See L<http://homepages.cwi.nl/%7Esteven/abc/>.
|
|
|
|
=item Ada
|
|
|
|
Comments in I<Ada> start with C<-->, and last till the end of the line.
|
|
|
|
=item Advisor
|
|
|
|
I<Advisor> is a language used by the HP product I<glance>. Comments for
|
|
this language start with either C<#> or C<//>, and last till the
|
|
end of the line.
|
|
|
|
=item Advsys
|
|
|
|
Comments for the I<Advsys> language start with C<;> and last till
|
|
the end of the line. See also L<http://www.wurb.com/if/devsys/12>.
|
|
|
|
=item Alan
|
|
|
|
I<Alan> comments start with C<-->, and last till the end of the line.
|
|
See also L<http://w1.132.telia.com/~u13207378/alan/manual/alanTOC.html>.
|
|
|
|
=item Algol 60
|
|
|
|
Comments in the I<Algol 60> language start with the keyword C<comment>,
|
|
and end with a C<;>. See L<http://www.masswerk.at/algol60/report.htm>.
|
|
|
|
=item Algol 68
|
|
|
|
In I<Algol 68>, comments are either delimited by C<#>, or by one of the
|
|
keywords C<co> or C<comment>. The keywords should not be part of another
|
|
word. See L<http://westein.arb-phys.uni-dortmund.de/~wb/a68s.txt>.
|
|
With C<{-keep}>, only C<$1> will be set, returning the entire comment.
|
|
|
|
=item ALPACA
|
|
|
|
The I<ALPACA> language has comments starting with C</*> and ending with C<*/>.
|
|
|
|
=item awk
|
|
|
|
The I<awk> programming language uses comments that start with C<#>
|
|
and end at the end of the line.
|
|
|
|
=item B
|
|
|
|
The I<B> language has comments starting with C</*> and ending with C<*/>.
|
|
|
|
=item BASIC
|
|
|
|
There are various forms of BASIC around. Currently, we only support the
|
|
variant supported by I<mvEnterprise>, whose pattern is available as
|
|
C<$RE{comment}{BASIC}{mvEnterprise}>. Comments in this language start with a
|
|
C<!>, a C<*> or the keyword C<REM>, and end till the end of the line. See
|
|
L<http://www.rainingdata.com/products/beta/docs/mve/50/ReferenceManual/Basic.pdf>.
|
|
|
|
=item Beatnik
|
|
|
|
The esotoric language I<Beatnik> only uses words consisting of letters.
|
|
Words are scored according to the rules of Scrabble. Words scoring less
|
|
than 5 points, or 18 points or more are considered comments (although
|
|
the compiler might mock at you if you score less than 5 points).
|
|
Regardless whether C<{-keep}>, C<$1> will be set, and set to the
|
|
entire comment. This pattern requires I<perl 5.8.0> or newer.
|
|
|
|
=item beta-Juliet
|
|
|
|
The I<beta-Juliet> programming language has comments that start with
|
|
C<//> and that continue till the end of the line. See also
|
|
L<http://www.catseye.mb.ca/esoteric/b-juliet/index.html>.
|
|
|
|
=item Befunge-98
|
|
|
|
The esotoric language I<Befunge-98> uses comments that start and end
|
|
with a C<;>. See L<http://www.catseye.mb.ca/esoteric/befunge/98/spec98.html>.
|
|
|
|
=item BML
|
|
|
|
I<BML>, or I<Better Markup Language> is an HTML templating language that
|
|
uses comments starting with C<< <?c_ >>, and ending with C<< c_?> >>.
|
|
See L<http://www.livejournal.com/doc/server/bml.index.html>.
|
|
|
|
=item Brainfuck
|
|
|
|
The minimal language I<Brainfuck> uses only eight characters,
|
|
C<E<lt>>, C<E<gt>>, C<[>, C<]>, C<+>, C<->, C<.> and C<,>.
|
|
Any other characters are considered comments. With C<{-keep}>,
|
|
C<$1> is set to the entire comment.
|
|
|
|
=item C
|
|
|
|
The I<C> language has comments starting with C</*> and ending with C<*/>.
|
|
|
|
=item C--
|
|
|
|
The I<C--> language has comments starting with C</*> and ending with C<*/>.
|
|
See L<http://cs.uas.arizona.edu/classes/453/programs/C--Spec.html>.
|
|
|
|
=item C++
|
|
|
|
The I<C++> language has two forms of comments. Comments that start with
|
|
C<//> and last till the end of the line, and comments that start with
|
|
C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
|
|
set, and set to the entire comment.
|
|
|
|
=item C#
|
|
|
|
The I<C#> language has two forms of comments. Comments that start with
|
|
C<//> and last till the end of the line, and comments that start with
|
|
C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
|
|
set, and set to the entire comment.
|
|
See L<http://msdn.microsoft.com/library/default.asp?url=/library/en-us/csspec/html/vclrfcsharpspec_C.asp>.
|
|
|
|
=item Caml
|
|
|
|
Comments in I<Caml> start with C<(*>, end with C<*)>, and can be nested.
|
|
See L<http://www.cs.caltech.edu/courses/cs134/cs134b/book.pdf> and
|
|
L<http://pauillac.inria.fr/caml/index-eng.html>.
|
|
|
|
=item Cg
|
|
|
|
The I<Cg> language has two forms of comments. Comments that start with
|
|
C<//> and last till the end of the line, and comments that start with
|
|
C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
|
|
set, and set to the entire comment.
|
|
See L<http://developer.nvidia.com/attach/3722>.
|
|
|
|
=item CLU
|
|
|
|
In C<CLU>, a comment starts with a procent sign (C<%>), and ends with the
|
|
next newline. See L<ftp://ftp.lcs.mit.edu:/pub/pclu/CLU-syntax.ps> and
|
|
L<http://www.pmg.lcs.mit.edu/CLU.html>.
|
|
|
|
=item COBOL
|
|
|
|
Traditionally, comments in I<COBOL> are indicated by an asterisk in the
|
|
seventh column. This is what the pattern matches. Modern compiler may
|
|
more lenient though. See L<http://www.csis.ul.ie/cobol/Course/COBOLIntro.htm>,
|
|
and L<http://www.csis.ul.ie/cobol/default.htm>.
|
|
|
|
=item CQL
|
|
|
|
Comments in the chess query language (I<CQL>) start with a semi colon
|
|
(C<;>) and last till the end of the line. See L<http://www.rbnn.com/cql/>.
|
|
|
|
=item Crystal Report
|
|
|
|
The formula editor in I<Crystal Reports> uses comments that start
|
|
with C<//>, and end with the end of the line.
|
|
|
|
=item Dylan
|
|
|
|
There are two types of comments in I<Dylan>. They either start with
|
|
C<//>, or are nested comments, delimited with C</*> and C<*/>.
|
|
Under C<{-keep}>, only C<$1> will be set, returning the entire comment.
|
|
This pattern requires I<perl 5.6.0> or newer.
|
|
|
|
=item ECMAScript
|
|
|
|
The I<ECMAScript> language has two forms of comments. Comments that start with
|
|
C<//> and last till the end of the line, and comments that start with
|
|
C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
|
|
set, and set to the entire comment. I<JavaScript> is Netscapes implementation
|
|
of I<ECMAScript>. See
|
|
L<http://www.ecma-international.org/publications/files/ecma-st/Ecma-262.pdf>,
|
|
and L<http://www.ecma-international.org/publications/standards/Ecma-262.htm>.
|
|
|
|
=item Eiffel
|
|
|
|
I<Eiffel> comments start with C<-->, and last till the end of the line.
|
|
|
|
=item False
|
|
|
|
In I<False>, comments start with C<{> and end with C<}>.
|
|
See L<http://wouter.fov120.com/false/false.txt>
|
|
|
|
=item FPL
|
|
|
|
The I<FPL> language has two forms of comments. Comments that start with
|
|
C<//> and last till the end of the line, and comments that start with
|
|
C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
|
|
set, and set to the entire comment.
|
|
|
|
=item Forth
|
|
|
|
Comments in Forth start with C<\>, and end with the end of the line.
|
|
See also L<http://docs.sun.com/sb/doc/806-1377-10>.
|
|
|
|
=item Fortran
|
|
|
|
There are two forms of I<Fortran>. There's free form I<Fortran>, which
|
|
has comments that start with C<!>, and end at the end of the line.
|
|
The pattern for this is given by C<$RE{Fortran}>. Fixed form I<Fortran>,
|
|
which has been obsoleted, has comments that start with C<C>, C<c> or
|
|
C<*> in the first column, or with C<!> anywhere, but the sixth column.
|
|
The pattern for this are given by C<$RE{Fortran}{fixed}>.
|
|
|
|
See also L<http://www.cray.com/craydoc/manuals/007-3692-005/html-007-3692-005/>.
|
|
|
|
=item Funge-98
|
|
|
|
The esotoric language I<Funge-98> uses comments that start and end with
|
|
a C<;>.
|
|
|
|
=item fvwm2
|
|
|
|
Configuration files for I<fvwm2> have comments starting with a
|
|
C<#> and lasting the rest of the line.
|
|
|
|
=item Haifu
|
|
|
|
I<Haifu>, an esotoric language using haikus, has comments starting and
|
|
ending with a C<,>.
|
|
See L<http://www.dangermouse.net/esoteric/haifu.html>.
|
|
|
|
=item Haskell
|
|
|
|
There are two types of comments in I<Haskell>. They either start with
|
|
at least two dashes, or are nested comments, delimited with C<{-> and C<-}>.
|
|
Under C<{-keep}>, only C<$1> will be set, returning the entire comment.
|
|
This pattern requires I<perl 5.6.0> or newer.
|
|
|
|
=item HTML
|
|
|
|
In I<HTML>, comments only appear inside a I<comment declaration>.
|
|
A comment declaration starts with a C<E<lt>!>, and ends with a
|
|
C<E<gt>>. Inside this declaration, we have zero or more comments.
|
|
Comments starts with C<--> and end with C<-->, and are optionally
|
|
followed by whitespace. The pattern C<$RE{comment}{HTML}> recognizes
|
|
those comment declarations (and hence more than a comment).
|
|
Note that this is not the same as something that starts with
|
|
C<E<lt>!--> and ends with C<--E<gt>>, because the following will
|
|
be matched completely:
|
|
|
|
<!-- First Comment --
|
|
--> Second Comment <!--
|
|
-- Third Comment -->
|
|
|
|
Do not be fooled by what your favourite browser thinks is an HTML
|
|
comment.
|
|
|
|
If C<{-keep}> is used, the following are returned:
|
|
|
|
=over 4
|
|
|
|
=item $1
|
|
|
|
captures the entire comment declaration.
|
|
|
|
=item $2
|
|
|
|
captures the MDO (markup declaration open), C<E<lt>!>.
|
|
|
|
=item $3
|
|
|
|
captures the content between the MDO and the MDC.
|
|
|
|
=item $4
|
|
|
|
captures the (last) comment, without the surrounding dashes.
|
|
|
|
=item $5
|
|
|
|
captures the MDC (markup declaration close), C<E<gt>>.
|
|
|
|
=back
|
|
|
|
=item Hugo
|
|
|
|
There are two types of comments in I<Hugo>. They either start with
|
|
C<!> (which cannot be followed by a C<\>), or are nested comments,
|
|
delimited with C<!\> and C<\!>.
|
|
Under C<{-keep}>, only C<$1> will be set, returning the entire comment.
|
|
This pattern requires I<perl 5.6.0> or newer.
|
|
|
|
=item Icon
|
|
|
|
I<Icon> has comments that start with C<#> and end at the next new line.
|
|
See L<http://www.toolsofcomputing.com/IconHandbook/IconHandbook.pdf>,
|
|
L<http://www.cs.arizona.edu/icon/index.htm>, and
|
|
L<http://burks.bton.ac.uk/burks/language/icon/index.htm>.
|
|
|
|
=item ILLGOL
|
|
|
|
The esotoric language I<ILLGOL> uses comments starting with I<NB> and lasting
|
|
till the end of the line.
|
|
See L<http://www.catseye.mb.ca/esoteric/illgol/index.html>.
|
|
|
|
=item INTERCAL
|
|
|
|
Comments in INTERCAL are single line comments. They start with one of
|
|
the keywords C<NOT> or C<N'T>, and can optionally be preceded by the
|
|
keywords C<DO> and C<PLEASE>. If both keywords are used, C<PLEASE>
|
|
precedes C<DO>. Keywords are separated by whitespace.
|
|
|
|
=item J
|
|
|
|
The language I<J> uses comments that start with C<NB.>, and that last till
|
|
the end of the line. See
|
|
L<http://www.jsoftware.com/books/help/primer/contents.htm>, and
|
|
L<http://www.jsoftware.com/>.
|
|
|
|
=item Java
|
|
|
|
The I<Java> language has two forms of comments. Comments that start with
|
|
C<//> and last till the end of the line, and comments that start with
|
|
C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
|
|
set, and set to the entire comment.
|
|
|
|
=item JavaDoc
|
|
|
|
The I<Javadoc> documentation syntax is demarked with a subset of
|
|
ordinary Java comments to separate it from code. Comments start with
|
|
C</**> end with C<*/>. If C<{-keep}> is used, only C<$1> will be set,
|
|
and set to the entire comment. See
|
|
L<http://www.oracle.com/technetwork/java/javase/documentation/index-137868.html#format>.
|
|
|
|
=item JavaScript
|
|
|
|
The I<JavaScript> language has two forms of comments. Comments that start with
|
|
C<//> and last till the end of the line, and comments that start with
|
|
C</*>, and end with C<*/>. If C<{-keep}> is used, only C<$1> will be
|
|
set, and set to the entire comment. I<JavaScript> is Netscapes implementation
|
|
of I<ECMAScript>.
|
|
See L<http://www.mozilla.org/js/language/E262-3.pdf>,
|
|
and L<http://www.mozilla.org/js/language/>.
|
|
|
|
=item LaTeX
|
|
|
|
The documentation language I<LaTeX> uses comments starting with C<%>
|
|
and ending at the end of the line.
|
|
|
|
=item Lisp
|
|
|
|
Comments in I<Lisp> start with a semi-colon (C<;>) and last till the
|
|
end of the line.
|
|
|
|
=item LPC
|
|
|
|
The I<LPC> language has comments starting with C</*> and ending with C<*/>.
|
|
|
|
=item LOGO
|
|
|
|
Comments for the language I<LOGO> start with C<;>, and last till the end
|
|
of the line.
|
|
|
|
=item lua
|
|
|
|
Comments for the I<lua> language start with C<-->, and last till the end
|
|
of the line. See also L<http://www.lua.org/manual/manual.html>.
|
|
|
|
=item M, MUMPS
|
|
|
|
In C<M> (aka C<MUMPS>), comments start with a semi-colon, and last
|
|
till the end of a line. The language specification requires the
|
|
semi-colon to be preceded by one or more I<linestart character>s.
|
|
Those characters default to a space, but that's configurable. This
|
|
requirement, of preceding the comment with linestart characters is
|
|
B<not> tested for. See
|
|
L<ftp://ftp.intersys.com/pub/openm/ism/ism64docs.zip>,
|
|
L<http://mtechnology.intersys.com/mproducts/openm/index.html>, and
|
|
L<http://mcenter.com/mtrc/index.html>.
|
|
|
|
=item m4
|
|
|
|
By default, the preprocessor language I<m4> uses single line comments,
|
|
that start with a C<#> and continue to the end of the line, including
|
|
the newline. The pattern C<$RE {comment} {m4}> matches such comments.
|
|
In I<m4>, it is possible to change the starting token though.
|
|
See L<http://wolfram.schneider.org/bsd/7thEdManVol2/m4/m4.pdf>,
|
|
L<http://www.cs.stir.ac.uk/~kjt/research/pdf/expl-m4.pdf>, and
|
|
L<http://www.gnu.org/software/m4/manual/>.
|
|
|
|
=item Modula-2
|
|
|
|
In C<Modula-2>, comments start with C<(*>, and end with C<*)>. Comments
|
|
may be nested. See L<http://www.modula2.org/>.
|
|
|
|
=item Modula-3
|
|
|
|
In C<Modula-3>, comments start with C<(*>, and end with C<*)>. Comments
|
|
may be nested. See L<http://www.m3.org/>.
|
|
|
|
=item mutt
|
|
|
|
Configuration files for I<mutt> have comments starting with a
|
|
C<#> and lasting the rest of the line.
|
|
|
|
=item Nickle
|
|
|
|
The I<Nickle> language has one line comments starting with C<#>
|
|
(like Perl), or multiline comments delimited by C</*> and C<*/>
|
|
(like C). Under C<-keep>, only C<$1> will be set. See also
|
|
L<http://www.nickle.org>.
|
|
|
|
=item Oberon
|
|
|
|
Comments in I<Oberon> start with C<(*> and end with C<*)>.
|
|
See L<http://www.oberon.ethz.ch/oreport.html>.
|
|
|
|
=item Pascal
|
|
|
|
There are many implementations of Pascal. This modules provides
|
|
pattern for comments of several implementations.
|
|
|
|
=over 4
|
|
|
|
=item C<$RE{comment}{Pascal}>
|
|
|
|
This is the pattern that recognizes comments according to the Pascal ISO
|
|
standard. This standard says that comments start with either C<{>, or
|
|
C<(*>, and end with C<}> or C<*)>. This means that C<{*)> and C<(*}>
|
|
are considered to be comments. Many Pascal applications don't allow this.
|
|
See L<http://www.pascal-central.com/docs/iso10206.txt>
|
|
|
|
=item C<$RE{comment}{Pascal}{Alice}>
|
|
|
|
The I<Alice Pascal> compiler accepts comments that start with C<{>
|
|
and end with C<}>. Comments are not allowed to contain newlines.
|
|
See L<http://www.templetons.com/brad/alice/language/>.
|
|
|
|
=item C<$RE{comment}{Pascal}{Delphi}>, C<$RE{comment}{Pascal}{Free}>
|
|
and C<$RE{comment}{Pascal}{GPC}>
|
|
|
|
The I<Delphi Pascal>, I<Free Pascal> and the I<Gnu Pascal Compiler>
|
|
implementations of Pascal all have comments that either start with
|
|
C<//> and last till the end of the line, are delimited with C<{>
|
|
and C<}> or are delimited with C<(*> and C<*)>. Patterns for those
|
|
comments are given by C<$RE{comment}{Pascal}{Delphi}>,
|
|
C<$RE{comment}{Pascal}{Free}> and C<$RE{comment}{Pascal}{GPC}>
|
|
respectively. These patterns only set C<$1> when C<{-keep}> is used,
|
|
which will then include the entire comment.
|
|
|
|
See L<http://info.borland.com/techpubs/delphi5/oplg/>,
|
|
L<http://www.freepascal.org/docs-html/ref/ref.html> and
|
|
L<http://www.gnu-pascal.de/gpc/>.
|
|
|
|
=item C<$RE{comment}{Pascal}{Workshop}>
|
|
|
|
The I<Workshop Pascal> compiler, from SUN Microsystems, allows comments
|
|
that are delimited with either C<{> and C<}>, delimited with
|
|
C<(*)> and C<*>), delimited with C</*>, and C<*/>, or starting
|
|
and ending with a double quote (C<">). When C<{-keep}> is used,
|
|
only C<$1> is set, and returns the entire comment.
|
|
|
|
See L<http://docs.sun.com/db/doc/802-5762>.
|
|
|
|
=back
|
|
|
|
=item PEARL
|
|
|
|
Comments in I<PEARL> start with a C<!> and last till the end of the
|
|
line, or start with C</*> and end with C<*/>. With C<{-keep}>,
|
|
C<$1> will be set to the entire comment.
|
|
|
|
=item PHP
|
|
|
|
Comments in I<PHP> start with either C<#> or C<//> and last till the
|
|
end of the line, or are delimited by C</*> and C<*/>. With C<{-keep}>,
|
|
C<$1> will be set to the entire comment.
|
|
|
|
=item PL/B
|
|
|
|
In I<PL/B>, comments start with either C<.> or C<;>, and end with the
|
|
next newline. See L<http://www.mmcctech.com/pl-b/plb-0010.htm>.
|
|
|
|
=item PL/I
|
|
|
|
The I<PL/I> language has comments starting with C</*> and ending with C<*/>.
|
|
|
|
=item PL/SQL
|
|
|
|
In I<PL/SQL>, comments either start with C<--> and run till the end
|
|
of the line, or start with C</*> and end with C<*/>.
|
|
|
|
=item Perl
|
|
|
|
I<Perl> uses comments that start with a C<#>, and continue till the end
|
|
of the line.
|
|
|
|
=item Portia
|
|
|
|
The I<Portia> programming language has comments that start with C<//>,
|
|
and last till the end of the line.
|
|
|
|
=item Python
|
|
|
|
I<Python> uses comments that start with a C<#>, and continue till the end
|
|
of the line.
|
|
|
|
=item Q-BAL
|
|
|
|
Comments in the I<Q-BAL> language start with C<`> (a backtick), and
|
|
continue till the end of the line.
|
|
|
|
=item QML
|
|
|
|
In C<QML>, comments start with C<#> and last till the end of the line.
|
|
See L<http://www.questionmark.com/uk/qml/overview.doc>.
|
|
|
|
=item R
|
|
|
|
The statistical language I<R> uses comments that start with a C<#> and
|
|
end with the following new line. See L<http://www.r-project.org/>.
|
|
|
|
=item REBOL
|
|
|
|
Comments for the I<REBOL> language start with C<;> and last till the
|
|
end of the line.
|
|
|
|
=item Ruby
|
|
|
|
Comments in I<Ruby> start with C<#> and last till the end of the time.
|
|
|
|
=item Scheme
|
|
|
|
I<Scheme> comments start with C<;>, and last till the end of the line.
|
|
See L<http://schemers.org/>.
|
|
|
|
=item shell
|
|
|
|
Comments in various I<shell>s start with a C<#> and end at the end of
|
|
the line.
|
|
|
|
=item Shelta
|
|
|
|
The esotoric language I<Shelta> uses comments that start and end with
|
|
a C<;>. See L<http://www.catseye.mb.ca/esoteric/shelta/index.html>.
|
|
|
|
=item SLIDE
|
|
|
|
The I<SLIDE> language has two forms of comments. First there is the
|
|
line comment, which starts with a C<#> and includes the rest of the
|
|
line (just like Perl). Second, there is the multiline, nested comment,
|
|
which are delimited by C<(*> and C<*)>. Under C{-keep}>, only
|
|
C<$1> is set, and is set to the entire comment. See
|
|
L<http://www.cs.berkeley.edu/~ug/slide/docs/slide/spec/spec_frame_intro.shtml>.
|
|
|
|
=item slrn
|
|
|
|
Configuration files for I<slrn> have comments starting with a
|
|
C<%> and lasting the rest of the line.
|
|
|
|
=item Smalltalk
|
|
|
|
I<Smalltalk> uses comments that start and end with a double quote, C<">.
|
|
|
|
=item SMITH
|
|
|
|
Comments in the I<SMITH> language start with C<;>, and last till the
|
|
end of the line.
|
|
|
|
=item Squeak
|
|
|
|
In the Smalltalk variant I<Squeak>, comments start and end with
|
|
C<">. Double quotes can appear inside comments by doubling them.
|
|
|
|
=item SQL
|
|
|
|
Standard I<SQL> uses comments starting with two or more dashes, and
|
|
ending at the end of the line.
|
|
|
|
I<MySQL> does not follow the standard. Instead, it allows comments
|
|
that start with a C<#> or C<-- > (that's two dashes and a space)
|
|
ending with the following newline, and comments starting with
|
|
C</*>, and ending with the next C<;> or C<*/> that isn't inside
|
|
single or double quotes. A pattern for this is returned by
|
|
C<$RE{comment}{SQL}{MySQL}>. With C<{-keep}>, only C<$1> will
|
|
be set, and it returns the entire comment.
|
|
|
|
=item Tcl
|
|
|
|
In I<Tcl>, comments start with C<#> and continue till the end of the line.
|
|
|
|
=item TeX
|
|
|
|
The documentation language I<TeX> uses comments starting with C<%>
|
|
and ending at the end of the line.
|
|
|
|
=item troff
|
|
|
|
The document formatting language I<troff> uses comments starting
|
|
with C<\">, and continuing till the end of the line.
|
|
|
|
=item Ubercode
|
|
|
|
The Windows programming language I<Ubercode> uses comments that start with
|
|
C<//> and continue to the end of the line. See L<http://www.ubercode.com>.
|
|
|
|
=item vi
|
|
|
|
In configuration files for the editor I<vi>, one can use comments
|
|
starting with C<">, and ending at the end of the line.
|
|
|
|
=item *W
|
|
|
|
In the language I<*W>, comments start with C<||>, and end with C<!!>.
|
|
|
|
=item zonefile
|
|
|
|
Comments in DNS I<zonefile>s start with C<;>, and continue till the
|
|
end of the line.
|
|
|
|
=item ZZT-OOP
|
|
|
|
The in-game language I<ZZT-OOP> uses comments that start with a C<'>
|
|
character, and end at the following newline. See
|
|
L<http://dave2.rocketjump.org/rad/zzthelp/lang.html>.
|
|
|
|
=back
|
|
|
|
=head1 REFERENCES
|
|
|
|
=over 4
|
|
|
|
=item B<[Go 90]>
|
|
|
|
Charles F. Goldfarb: I<The SGML Handbook>. Oxford: Oxford University
|
|
Press. B<1990>. ISBN 0-19-853737-9. Ch. 10.3, pp 390-391.
|
|
|
|
=back
|
|
|
|
=head1 SEE ALSO
|
|
|
|
L<Regexp::Common> for a general description of how to use this interface.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Damian Conway (damian@conway.org)
|
|
|
|
=head1 MAINTENANCE
|
|
|
|
This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>.
|
|
|
|
=head1 BUGS AND IRRITATIONS
|
|
|
|
Bound to be plenty.
|
|
|
|
For a start, there are many common regexes missing.
|
|
Send them in to I<regexp-common@abigail.be>.
|
|
|
|
=head1 LICENSE and COPYRIGHT
|
|
|
|
This software is Copyright (c) 2001 - 2017, Damian Conway and Abigail.
|
|
|
|
This module is free software, and maybe used under any of the following
|
|
licenses:
|
|
|
|
1) The Perl Artistic License. See the file COPYRIGHT.AL.
|
|
2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
|
|
3) The BSD License. See the file COPYRIGHT.BSD.
|
|
4) The MIT License. See the file COPYRIGHT.MIT.
|
|
|
|
=cut
|
|
EOC
|
|
# 2}}}
|
|
$Regexp_Common_Contents{'Common/balanced'} = <<'EOB'; # {{{2
|
|
package Regexp::Common::balanced; {
|
|
|
|
use 5.10.0;
|
|
|
|
use strict;
|
|
use warnings;
|
|
no warnings 'syntax';
|
|
|
|
use Regexp::Common qw /pattern clean no_defaults/;
|
|
|
|
our $VERSION = '2017060201';
|
|
|
|
my %closer = ( '{'=>'}', '('=>')', '['=>']', '<'=>'>' );
|
|
my %cache;
|
|
|
|
sub nested {
|
|
my ($start, $finish) = @_;
|
|
|
|
return $cache {$start} {$finish} if exists $cache {$start} {$finish};
|
|
|
|
my @starts = map {s/\\(.)/$1/g; $_} grep {length}
|
|
$start =~ /([^|\\]+|\\.)+/gs;
|
|
my @finishes = map {s/\\(.)/$1/g; $_} grep {length}
|
|
$finish =~ /([^|\\]+|\\.)+/gs;
|
|
|
|
push @finishes => ($finishes [-1]) x (@starts - @finishes);
|
|
|
|
my @re;
|
|
local $" = "|";
|
|
foreach my $begin (@starts) {
|
|
my $end = shift @finishes;
|
|
|
|
my $qb = quotemeta $begin;
|
|
my $qe = quotemeta $end;
|
|
my $fb = quotemeta substr $begin => 0, 1;
|
|
my $fe = quotemeta substr $end => 0, 1;
|
|
|
|
my $tb = quotemeta substr $begin => 1;
|
|
my $te = quotemeta substr $end => 1;
|
|
|
|
my $add;
|
|
if ($fb eq $fe) {
|
|
push @re =>
|
|
qq /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|(?-1))*$qe)/;
|
|
}
|
|
else {
|
|
my @clauses = "(?>[^$fb$fe]+)";
|
|
push @clauses => "$fb(?!$tb)" if length $tb;
|
|
push @clauses => "$fe(?!$te)" if length $te;
|
|
push @clauses => "(?-1)";
|
|
push @re => qq /(?:$qb(?:@clauses)*$qe)/;
|
|
}
|
|
}
|
|
|
|
$cache {$start} {$finish} = qr /(@re)/;
|
|
}
|
|
|
|
|
|
pattern name => [qw /balanced -parens=() -begin= -end=/],
|
|
create => sub {
|
|
my $flag = $_[1];
|
|
unless (defined $flag -> {-begin} && length $flag -> {-begin} &&
|
|
defined $flag -> {-end} && length $flag -> {-end}) {
|
|
my @open = grep {index ($flag->{-parens}, $_) >= 0}
|
|
('[','(','{','<');
|
|
my @close = map {$closer {$_}} @open;
|
|
$flag -> {-begin} = join "|" => @open;
|
|
$flag -> {-end} = join "|" => @close;
|
|
}
|
|
return nested @$flag {qw /-begin -end/};
|
|
},
|
|
;
|
|
|
|
}
|
|
|
|
1;
|
|
|
|
__END__
|
|
|
|
=pod
|
|
|
|
=head1 NAME
|
|
|
|
Regexp::Common::balanced -- provide regexes for strings with balanced
|
|
parenthesized delimiters or arbitrary delimiters.
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use Regexp::Common qw /balanced/;
|
|
|
|
while (<>) {
|
|
/$RE{balanced}{-parens=>'()'}/
|
|
and print q{balanced parentheses\n};
|
|
}
|
|
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
Please consult the manual of L<Regexp::Common> for a general description
|
|
of the works of this interface.
|
|
|
|
Do not use this module directly, but load it via I<Regexp::Common>.
|
|
|
|
=head2 C<$RE{balanced}{-parens}>
|
|
|
|
Returns a pattern that matches a string that starts with the nominated
|
|
opening parenthesis or bracket, contains characters and properly nested
|
|
parenthesized subsequences, and ends in the matching parenthesis.
|
|
|
|
More than one type of parenthesis can be specified:
|
|
|
|
$RE{balanced}{-parens=>'(){}'}
|
|
|
|
in which case all specified parenthesis types must be correctly balanced within
|
|
the string.
|
|
|
|
Since version 2013030901, C<< $1 >> will always be set (to the entire
|
|
matched substring), regardless whether C<< {-keep} >> is used or not.
|
|
|
|
=head2 C<< $RE{balanced}{-begin => "begin"}{-end => "end"} >>
|
|
|
|
Returns a pattern that matches a string that is properly balanced
|
|
using the I<begin> and I<end> strings as start and end delimiters.
|
|
Multiple sets of begin and end strings can be given by separating
|
|
them by C<|>s (which can be escaped with a backslash).
|
|
|
|
qr/$RE{balanced}{-begin => "do|if|case"}{-end => "done|fi|esac"}/
|
|
|
|
will match properly balanced strings that either start with I<do> and
|
|
end with I<done>, start with I<if> and end with I<fi>, or start with
|
|
I<case> and end with I<esac>.
|
|
|
|
If I<-end> contains less cases than I<-begin>, the last case of I<-end>
|
|
is repeated. If it contains more cases than I<-begin>, the extra cases
|
|
are ignored. If either of I<-begin> or I<-end> isn't given, or is empty,
|
|
I<< -begin => '(' >> and I<< -end => ')' >> are assumed.
|
|
|
|
Since version 2013030901, C<< $1 >> will always be set (to the entire
|
|
matched substring), regardless whether C<< {-keep} >> is used or not.
|
|
|
|
=head2 Note
|
|
|
|
Since version 2013030901 the pattern will make of the recursive construct
|
|
C<< (?-1) >>, instead of using the problematic C<< (??{ }) >> construct.
|
|
This fixes an problem that was introduced in the 5.17 development track.
|
|
|
|
=head1 SEE ALSO
|
|
|
|
L<Regexp::Common> for a general description of how to use this interface.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Damian Conway (damian@conway.org)
|
|
|
|
=head1 MAINTENANCE
|
|
|
|
This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>.
|
|
|
|
=head1 BUGS AND IRRITATIONS
|
|
|
|
Bound to be plenty.
|
|
|
|
For a start, there are many common regexes missing.
|
|
Send them in to I<regexp-common@abigail.be>.
|
|
|
|
=head1 LICENSE and COPYRIGHT
|
|
|
|
This software is Copyright (c) 2001 - 2017, Damian Conway and Abigail.
|
|
|
|
This module is free software, and maybe used under any of the following
|
|
licenses:
|
|
|
|
1) The Perl Artistic License. See the file COPYRIGHT.AL.
|
|
2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
|
|
3) The BSD License. See the file COPYRIGHT.BSD.
|
|
4) The MIT License. See the file COPYRIGHT.MIT.
|
|
|
|
=cut
|
|
EOB
|
|
# 2}}}
|
|
$Regexp_Common_Contents{'Common/delimited'} = <<'EOD'; # {{{2
|
|
package Regexp::Common::delimited;
|
|
|
|
use 5.10.0;
|
|
|
|
use strict;
|
|
use warnings;
|
|
no warnings 'syntax';
|
|
|
|
use Regexp::Common qw /pattern clean no_defaults/;
|
|
|
|
use charnames ':full';
|
|
|
|
our $VERSION = '2017060201';
|
|
|
|
sub gen_delimited {
|
|
|
|
my ($dels, $escs, $cdels) = @_;
|
|
# return '(?:\S*)' unless $dels =~ /\S/;
|
|
if (defined $escs && length $escs) {
|
|
$escs .= substr ($escs, -1) x (length ($dels) - length ($escs));
|
|
}
|
|
if (defined $cdels && length $cdels) {
|
|
$cdels .= substr ($cdels, -1) x (length ($dels) - length ($cdels));
|
|
}
|
|
else {
|
|
$cdels = $dels;
|
|
}
|
|
|
|
my @pat = ();
|
|
for (my $i = 0; $i < length $dels; $i ++) {
|
|
my $del = quotemeta substr ($dels, $i, 1);
|
|
my $cdel = quotemeta substr ($cdels, $i, 1);
|
|
my $esc = defined $escs && length ($escs)
|
|
? quotemeta substr ($escs, $i, 1) : "";
|
|
if ($cdel eq $esc) {
|
|
push @pat =>
|
|
"(?k:$del)(?k:[^$cdel]*(?:(?:$cdel$cdel)[^$cdel]*)*)(?k:$cdel)";
|
|
}
|
|
elsif (length $esc) {
|
|
push @pat =>
|
|
"(?k:$del)(?k:[^$esc$cdel]*(?:$esc.[^$esc$cdel]*)*)(?k:$cdel)";
|
|
}
|
|
else {
|
|
push @pat => "(?k:$del)(?k:[^$cdel]*)(?k:$cdel)";
|
|
}
|
|
}
|
|
my $pat = join '|', @pat;
|
|
return "(?k:(?|$pat))";
|
|
}
|
|
|
|
sub _croak {
|
|
require Carp;
|
|
goto &Carp::croak;
|
|
}
|
|
|
|
pattern name => [qw( delimited -delim= -esc=\\ -cdelim= )],
|
|
create => sub {my $flags = $_[1];
|
|
_croak 'Must specify delimiter in $RE{delimited}'
|
|
unless length $flags->{-delim};
|
|
return gen_delimited (@{$flags}{-delim, -esc, -cdelim});
|
|
},
|
|
;
|
|
|
|
pattern name => [qw( quoted -esc=\\ )],
|
|
create => sub {my $flags = $_[1];
|
|
return gen_delimited (q{"'`}, $flags -> {-esc});
|
|
},
|
|
;
|
|
|
|
|
|
my @bracket_pairs;
|
|
if ($] >= 5.014) {
|
|
#
|
|
# List from http://xahlee.info/comp/unicode_matching_brackets.html
|
|
#
|
|
@bracket_pairs =
|
|
map {ref $_ ? $_ :
|
|
/!/ ? [(do {my $x = $_; $x =~ s/!/TOP/; $x},
|
|
do {my $x = $_; $x =~ s/!/BOTTOM/; $x})]
|
|
: [(do {my $x = $_; $x =~ s/\?/LEFT/; $x},
|
|
do {my $x = $_; $x =~ s/\?/RIGHT/; $x})]}
|
|
"? PARENTHESIS",
|
|
"? SQUARE BRACKET",
|
|
"? CURLY BRACKET",
|
|
"? DOUBLE QUOTATION MARK",
|
|
"? SINGLE QUOTATION MARK",
|
|
"SINGLE ?-POINTING ANGLE QUOTATION MARK",
|
|
"?-POINTING DOUBLE ANGLE QUOTATION MARK",
|
|
"FULLWIDTH ? PARENTHESIS",
|
|
"FULLWIDTH ? SQUARE BRACKET",
|
|
"FULLWIDTH ? CURLY BRACKET",
|
|
"FULLWIDTH ? WHITE PARENTHESIS",
|
|
"? WHITE PARENTHESIS",
|
|
"? WHITE SQUARE BRACKET",
|
|
"? WHITE CURLY BRACKET",
|
|
"? CORNER BRACKET",
|
|
"? ANGLE BRACKET",
|
|
"? DOUBLE ANGLE BRACKET",
|
|
"? BLACK LENTICULAR BRACKET",
|
|
"? TORTOISE SHELL BRACKET",
|
|
"? BLACK TORTOISE SHELL BRACKET",
|
|
"? WHITE CORNER BRACKET",
|
|
"? WHITE LENTICULAR BRACKET",
|
|
"? WHITE TORTOISE SHELL BRACKET",
|
|
"HALFWIDTH ? CORNER BRACKET",
|
|
"MATHEMATICAL ? WHITE SQUARE BRACKET",
|
|
"MATHEMATICAL ? ANGLE BRACKET",
|
|
"MATHEMATICAL ? DOUBLE ANGLE BRACKET",
|
|
"MATHEMATICAL ? FLATTENED PARENTHESIS",
|
|
"MATHEMATICAL ? WHITE TORTOISE SHELL BRACKET",
|
|
"? CEILING",
|
|
"? FLOOR",
|
|
"Z NOTATION ? IMAGE BRACKET",
|
|
"Z NOTATION ? BINDING BRACKET",
|
|
[ "HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT",
|
|
"HEAVY SINGLE " . "COMMA QUOTATION MARK ORNAMENT", ],
|
|
[ "HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT",
|
|
"HEAVY DOUBLE " . "COMMA QUOTATION MARK ORNAMENT", ],
|
|
"MEDIUM ? PARENTHESIS ORNAMENT",
|
|
"MEDIUM FLATTENED ? PARENTHESIS ORNAMENT",
|
|
"MEDIUM ? CURLY BRACKET ORNAMENT",
|
|
"MEDIUM ?-POINTING ANGLE BRACKET ORNAMENT",
|
|
"HEAVY ?-POINTING ANGLE QUOTATION MARK ORNAMENT",
|
|
"HEAVY ?-POINTING ANGLE BRACKET ORNAMENT",
|
|
"LIGHT ? TORTOISE SHELL BRACKET ORNAMENT",
|
|
"ORNATE ? PARENTHESIS",
|
|
"! PARENTHESIS",
|
|
"! SQUARE BRACKET",
|
|
"! CURLY BRACKET",
|
|
"! TORTOISE SHELL BRACKET",
|
|
"PRESENTATION FORM FOR VERTICAL ? CORNER BRACKET",
|
|
"PRESENTATION FORM FOR VERTICAL ? WHITE CORNER BRACKET",
|
|
"PRESENTATION FORM FOR VERTICAL ? TORTOISE SHELL BRACKET",
|
|
"PRESENTATION FORM FOR VERTICAL ? BLACK LENTICULAR BRACKET",
|
|
"PRESENTATION FORM FOR VERTICAL ? WHITE LENTICULAR BRACKET",
|
|
"PRESENTATION FORM FOR VERTICAL ? ANGLE BRACKET",
|
|
"PRESENTATION FORM FOR VERTICAL ? DOUBLE ANGLE BRACKET",
|
|
"PRESENTATION FORM FOR VERTICAL ? SQUARE BRACKET",
|
|
"PRESENTATION FORM FOR VERTICAL ? CURLY BRACKET",
|
|
"?-POINTING ANGLE BRACKET",
|
|
"? ANGLE BRACKET WITH DOT",
|
|
"?-POINTING CURVED ANGLE BRACKET",
|
|
"SMALL ? PARENTHESIS",
|
|
"SMALL ? CURLY BRACKET",
|
|
"SMALL ? TORTOISE SHELL BRACKET",
|
|
"SUPERSCRIPT ? PARENTHESIS",
|
|
"SUBSCRIPT ? PARENTHESIS",
|
|
"? SQUARE BRACKET WITH UNDERBAR",
|
|
[ "LEFT SQUARE BRACKET WITH TICK IN TOP CORNER",
|
|
"RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER", ],
|
|
[ "LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER",
|
|
"RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER", ],
|
|
"? SQUARE BRACKET WITH QUILL",
|
|
"TOP ? HALF BRACKET",
|
|
"BOTTOM ? HALF BRACKET",
|
|
"? S-SHAPED BAG DELIMITER",
|
|
[ "LEFT ARC LESS-THAN BRACKET",
|
|
"RIGHT ARC GREATER-THAN BRACKET", ],
|
|
[ "DOUBLE LEFT ARC GREATER-THAN BRACKET",
|
|
"DOUBLE RIGHT ARC LESS-THAN BRACKET", ],
|
|
"? SIDEWAYS U BRACKET",
|
|
"? DOUBLE PARENTHESIS",
|
|
"? WIGGLY FENCE",
|
|
"? DOUBLE WIGGLY FENCE",
|
|
"? LOW PARAPHRASE BRACKET",
|
|
"? RAISED OMISSION BRACKET",
|
|
"? SUBSTITUTION BRACKET",
|
|
"? DOTTED SUBSTITUTION BRACKET",
|
|
"? TRANSPOSITION BRACKET",
|
|
[ "OGHAM FEATHER MARK",
|
|
"OGHAM REVERSED FEATHER MARK", ],
|
|
[ "TIBETAN MARK GUG RTAGS GYON",
|
|
"TIBETAN MARK GUG RTAGS GYAS", ],
|
|
[ "TIBETAN MARK ANG KHANG GYON",
|
|
"TIBETAN MARK ANG KHANG GYAS", ],
|
|
;
|
|
|
|
#
|
|
# Filter out unknown characters; this may run on an older version
|
|
# of Perl with an old version of Unicode.
|
|
#
|
|
@bracket_pairs = grep {defined charnames::string_vianame ($$_ [0]) &&
|
|
defined charnames::string_vianame ($$_ [1])}
|
|
@bracket_pairs;
|
|
|
|
if (@bracket_pairs) {
|
|
my $delims = join "" => map {charnames::string_vianame ($$_ [0])}
|
|
@bracket_pairs;
|
|
my $cdelims = join "" => map {charnames::string_vianame ($$_ [1])}
|
|
@bracket_pairs;
|
|
|
|
pattern name => [qw (bquoted -esc=\\)],
|
|
create => sub {my $flags = $_ [1];
|
|
return gen_delimited ($delims, $flags -> {-esc},
|
|
$cdelims);
|
|
},
|
|
version => 5.014,
|
|
;
|
|
}
|
|
}
|
|
|
|
|
|
#
|
|
# Return the Unicode names of the pairs of matching delimiters.
|
|
#
|
|
sub bracket_pairs {@bracket_pairs}
|
|
|
|
1;
|
|
|
|
__END__
|
|
|
|
=pod
|
|
|
|
=head1 NAME
|
|
|
|
Regexp::Common::delimited -- provides a regex for delimited strings
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use Regexp::Common qw /delimited/;
|
|
|
|
while (<>) {
|
|
/$RE{delimited}{-delim=>'"'}/ and print 'a \" delimited string';
|
|
/$RE{delimited}{-delim=>'/'}/ and print 'a \/ delimited string';
|
|
}
|
|
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
Please consult the manual of L<Regexp::Common> for a general description
|
|
of the works of this interface.
|
|
|
|
Do not use this module directly, but load it via I<Regexp::Common>.
|
|
|
|
=head2 C<$RE{delimited}{-delim}{-cdelim}{-esc}>
|
|
|
|
Returns a pattern that matches a single-character-delimited substring,
|
|
with optional internal escaping of the delimiter.
|
|
|
|
When C<-delim => I<S>> is specified, each character in the sequence I<S> is
|
|
a possible delimiter. There is no default delimiter, so this flag must always
|
|
be specified.
|
|
|
|
By default, the closing delimiter is the same character as the opening
|
|
delimiter. If this is not wanted, for instance, if you want to match
|
|
a string with symmetric delimiters, you can specify the closing delimiter(s)
|
|
with C<-cdelim => I<S>>. Each character in I<S> is matched with the
|
|
corresponding character supplied with the C<-delim> option. If the C<-cdelim>
|
|
option has less characters than the C<-delim> option, the last character
|
|
is repeated as often as necessary. If the C<-cdelim> option has more
|
|
characters than the C<-delim> option, the extra characters are ignored.
|
|
|
|
If C<-esc => I<S>> is specified, each character in the sequence I<S> is
|
|
the delimiter for the corresponding character in the C<-delim=I<S>> list.
|
|
The default escape is backslash.
|
|
|
|
For example:
|
|
|
|
$RE{delimited}{-delim=>'"'} # match "a \" delimited string"
|
|
$RE{delimited}{-delim=>'"'}{-esc=>'"'} # match "a "" delimited string"
|
|
$RE{delimited}{-delim=>'/'} # match /a \/ delimited string/
|
|
$RE{delimited}{-delim=>q{'"}} # match "string" or 'string'
|
|
$RE{delimited}{-delim=>"("}{-cdelim=>")"} # match (string)
|
|
|
|
Under C<-keep> (See L<Regexp::Common>):
|
|
|
|
=over 4
|
|
|
|
=item $1
|
|
|
|
captures the entire match
|
|
|
|
=item $2
|
|
|
|
captures the opening delimiter
|
|
|
|
=item $3
|
|
|
|
captures delimited portion of the string
|
|
|
|
=item $4
|
|
|
|
captures the closing delimiter
|
|
|
|
=back
|
|
|
|
=head2 $RE{quoted}{-esc}
|
|
|
|
A synonym for C<< $RE {delimited} {-delim => q {'"`}} {...} >>.
|
|
|
|
=head2 $RE {bquoted} {-esc}
|
|
|
|
This is a pattern which matches delimited strings, where the delimiters
|
|
are a set of matching brackets. Currently, this comes 85 pairs. This
|
|
includes the 60 pairs of bidirection paired brackets, as listed
|
|
in L<< http://www.unicode.org/Public/UNIDATA/BidiBrackets.txt >>.
|
|
|
|
The other 25 pairs are the quotation marks, the double quotation
|
|
marks, the single and double pointing quoation marks, the heavy
|
|
single and double commas, 4 pairs of top-bottom parenthesis and
|
|
brackets, 9 pairs of presentation form for vertical brackets,
|
|
and the low paraphrase, raised omission, substitution, double
|
|
substitution, and transposition brackets.
|
|
|
|
In a future update, pairs may be added (or deleted).
|
|
|
|
This pattern requires perl 5.14.0 or higher.
|
|
|
|
For a full list of bracket pairs, inspect the output of
|
|
C<< Regexp::Common::delimited::bracket_pair () >>, which returns
|
|
a list of two element arrays, each holding the Unicode names of
|
|
matching pair of delimiters.
|
|
|
|
The C<< {-esc => I<S> } >> works as in the C<< $RE {delimited} >> pattern.
|
|
|
|
If C<< {-keep} >> is given, the following things will be captured:
|
|
|
|
=over 4
|
|
|
|
=item $1
|
|
|
|
captures the entire match
|
|
|
|
=item $2
|
|
|
|
captures the opening delimiter
|
|
|
|
=item $3
|
|
|
|
captures delimited portion of the string
|
|
|
|
=item $4
|
|
|
|
captures the closing delimiter
|
|
|
|
=back
|
|
|
|
=head1 SEE ALSO
|
|
|
|
L<Regexp::Common> for a general description of how to use this interface.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Damian Conway (damian@conway.org)
|
|
|
|
=head1 MAINTENANCE
|
|
|
|
This package is maintained by Abigail S<(I<regexp-common@abigail.be>)>.
|
|
|
|
=head1 BUGS AND IRRITATIONS
|
|
|
|
Bound to be plenty.
|
|
|
|
For a start, there are many common regexes missing.
|
|
Send them in to I<regexp-common@abigail.be>.
|
|
|
|
=head1 LICENSE and COPYRIGHT
|
|
|
|
This software is Copyright (c) 2001 - 2017, Damian Conway and Abigail.
|
|
|
|
This module is free software, and maybe used under any of the following
|
|
licenses:
|
|
|
|
1) The Perl Artistic License. See the file COPYRIGHT.AL.
|
|
2) The Perl Artistic License 2.0. See the file COPYRIGHT.AL2.
|
|
3) The BSD License. See the file COPYRIGHT.BSD.
|
|
4) The MIT License. See the file COPYRIGHT.MIT.
|
|
|
|
=cut
|
|
EOD
|
|
# 2}}}
|
|
my $problems = 0;
|
|
$HAVE_Rexexp_Common = 0;
|
|
my $dir = "";
|
|
if ($opt_sdir) {
|
|
++$TEMP_OFF;
|
|
$dir = "$opt_sdir/$TEMP_OFF";
|
|
File::Path::rmtree($dir) if is_dir($dir);
|
|
File::Path::mkpath($dir) unless is_dir($dir);
|
|
} else {
|
|
# let File::Temp create a suitable temporary directory
|
|
$dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
|
|
$TEMP_INST{ $dir } = "Regexp::Common";
|
|
}
|
|
print "Using temp dir [$dir] to install Regexp::Common\n" if $opt_v;
|
|
my $Regexp_dir = "$dir/Regexp";
|
|
my $Regexp_Common_dir = "$dir/Regexp/Common";
|
|
mkdir $Regexp_dir ;
|
|
mkdir $Regexp_Common_dir;
|
|
|
|
foreach my $module_file (keys %Regexp_Common_Contents) {
|
|
my $OUT = open_file('>', "$dir/Regexp/${module_file}.pm", 1);
|
|
if (defined $OUT) {
|
|
print $OUT $Regexp_Common_Contents{$module_file};
|
|
$OUT->close;
|
|
} else {
|
|
warn "Failed to install Regexp::${module_file}.pm\n";
|
|
$problems = 1;
|
|
}
|
|
}
|
|
|
|
push @INC, $dir;
|
|
eval "use Regexp::Common qw /comment RE_comment_HTML balanced/";
|
|
$HAVE_Rexexp_Common = 1 unless $problems;
|
|
} # 1}}}
|
|
sub Install_Algorithm_Diff { # {{{1
|
|
# Installs Tye McQueen's Algorithm::Diff module, v1.1902, into a
|
|
# temporary directory for the duration of this run.
|
|
|
|
my $Algorithm_Diff_Contents = <<'EOAlgDiff'; # {{{2
|
|
package Algorithm::Diff;
|
|
# Skip to first "=head" line for documentation.
|
|
use strict;
|
|
|
|
use integer; # see below in _replaceNextLargerWith() for mod to make
|
|
# if you don't use this
|
|
use vars qw( $VERSION @EXPORT_OK );
|
|
$VERSION = 1.19_02;
|
|
# ^ ^^ ^^-- Incremented at will
|
|
# | \+----- Incremented for non-trivial changes to features
|
|
# \-------- Incremented for fundamental changes
|
|
require Exporter;
|
|
*import = \&Exporter::import;
|
|
@EXPORT_OK = qw(
|
|
prepare LCS LCSidx LCS_length
|
|
diff sdiff compact_diff
|
|
traverse_sequences traverse_balanced
|
|
);
|
|
|
|
# McIlroy-Hunt diff algorithm
|
|
# Adapted from the Smalltalk code of Mario I. Wolczko, <mario@wolczko.com>
|
|
# by Ned Konz, perl@bike-nomad.com
|
|
# Updates by Tye McQueen, http://perlmonks.org/?node=tye
|
|
|
|
# Create a hash that maps each element of $aCollection to the set of
|
|
# positions it occupies in $aCollection, restricted to the elements
|
|
# within the range of indexes specified by $start and $end.
|
|
# The fourth parameter is a subroutine reference that will be called to
|
|
# generate a string to use as a key.
|
|
# Additional parameters, if any, will be passed to this subroutine.
|
|
#
|
|
# my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen );
|
|
|
|
sub _withPositionsOfInInterval
|
|
{
|
|
my $aCollection = shift; # array ref
|
|
my $start = shift;
|
|
my $end = shift;
|
|
my $keyGen = shift;
|
|
my %d;
|
|
my $index;
|
|
for ( $index = $start ; $index <= $end ; $index++ )
|
|
{
|
|
my $element = $aCollection->[$index];
|
|
my $key = &$keyGen( $element, @_ );
|
|
if ( exists( $d{$key} ) )
|
|
{
|
|
unshift ( @{ $d{$key} }, $index );
|
|
}
|
|
else
|
|
{
|
|
$d{$key} = [$index];
|
|
}
|
|
}
|
|
return wantarray ? %d : \%d;
|
|
}
|
|
|
|
# Find the place at which aValue would normally be inserted into the
|
|
# array. If that place is already occupied by aValue, do nothing, and
|
|
# return undef. If the place does not exist (i.e., it is off the end of
|
|
# the array), add it to the end, otherwise replace the element at that
|
|
# point with aValue. It is assumed that the array's values are numeric.
|
|
# This is where the bulk (75%) of the time is spent in this module, so
|
|
# try to make it fast!
|
|
|
|
sub _replaceNextLargerWith
|
|
{
|
|
my ( $array, $aValue, $high ) = @_;
|
|
$high ||= $#$array;
|
|
|
|
# off the end?
|
|
if ( $high == -1 || $aValue > $array->[-1] )
|
|
{
|
|
push ( @$array, $aValue );
|
|
return $high + 1;
|
|
}
|
|
|
|
# binary search for insertion point...
|
|
my $low = 0;
|
|
my $index;
|
|
my $found;
|
|
while ( $low <= $high )
|
|
{
|
|
$index = ( $high + $low ) / 2;
|
|
|
|
# $index = int(( $high + $low ) / 2); # without 'use integer'
|
|
$found = $array->[$index];
|
|
|
|
if ( $aValue == $found )
|
|
{
|
|
return undef;
|
|
}
|
|
elsif ( $aValue > $found )
|
|
{
|
|
$low = $index + 1;
|
|
}
|
|
else
|
|
{
|
|
$high = $index - 1;
|
|
}
|
|
}
|
|
|
|
# now insertion point is in $low.
|
|
$array->[$low] = $aValue; # overwrite next larger
|
|
return $low;
|
|
}
|
|
|
|
# This method computes the longest common subsequence in $a and $b.
|
|
|
|
# Result is array or ref, whose contents is such that
|
|
# $a->[ $i ] == $b->[ $result[ $i ] ]
|
|
# foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined.
|
|
|
|
# An additional argument may be passed; this is a hash or key generating
|
|
# function that should return a string that uniquely identifies the given
|
|
# element. It should be the case that if the key is the same, the elements
|
|
# will compare the same. If this parameter is undef or missing, the key
|
|
# will be the element as a string.
|
|
|
|
# By default, comparisons will use "eq" and elements will be turned into keys
|
|
# using the default stringizing operator '""'.
|
|
|
|
# Additional parameters, if any, will be passed to the key generation
|
|
# routine.
|
|
|
|
sub _longestCommonSubsequence
|
|
{
|
|
my $a = shift; # array ref or hash ref
|
|
my $b = shift; # array ref or hash ref
|
|
my $counting = shift; # scalar
|
|
my $keyGen = shift; # code ref
|
|
my $compare; # code ref
|
|
|
|
if ( ref($a) eq 'HASH' )
|
|
{ # prepared hash must be in $b
|
|
my $tmp = $b;
|
|
$b = $a;
|
|
$a = $tmp;
|
|
}
|
|
|
|
# Check for bogus (non-ref) argument values
|
|
if ( !ref($a) || !ref($b) )
|
|
{
|
|
my @callerInfo = caller(1);
|
|
die 'error: must pass array or hash references to ' . $callerInfo[3];
|
|
}
|
|
|
|
# set up code refs
|
|
# Note that these are optimized.
|
|
if ( !defined($keyGen) ) # optimize for strings
|
|
{
|
|
$keyGen = sub { $_[0] };
|
|
$compare = sub { my ( $a, $b ) = @_; $a eq $b };
|
|
}
|
|
else
|
|
{
|
|
$compare = sub {
|
|
my $a = shift;
|
|
my $b = shift;
|
|
&$keyGen( $a, @_ ) eq &$keyGen( $b, @_ );
|
|
};
|
|
}
|
|
|
|
my ( $aStart, $aFinish, $matchVector ) = ( 0, $#$a, [] );
|
|
my ( $prunedCount, $bMatches ) = ( 0, {} );
|
|
|
|
if ( ref($b) eq 'HASH' ) # was $bMatches prepared for us?
|
|
{
|
|
$bMatches = $b;
|
|
}
|
|
else
|
|
{
|
|
my ( $bStart, $bFinish ) = ( 0, $#$b );
|
|
|
|
# First we prune off any common elements at the beginning
|
|
while ( $aStart <= $aFinish
|
|
and $bStart <= $bFinish
|
|
and &$compare( $a->[$aStart], $b->[$bStart], @_ ) )
|
|
{
|
|
$matchVector->[ $aStart++ ] = $bStart++;
|
|
$prunedCount++;
|
|
}
|
|
|
|
# now the end
|
|
while ( $aStart <= $aFinish
|
|
and $bStart <= $bFinish
|
|
and &$compare( $a->[$aFinish], $b->[$bFinish], @_ ) )
|
|
{
|
|
$matchVector->[ $aFinish-- ] = $bFinish--;
|
|
$prunedCount++;
|
|
}
|
|
|
|
# Now compute the equivalence classes of positions of elements
|
|
$bMatches =
|
|
_withPositionsOfInInterval( $b, $bStart, $bFinish, $keyGen, @_ );
|
|
}
|
|
my $thresh = [];
|
|
my $links = [];
|
|
|
|
my ( $i, $ai, $j, $k );
|
|
for ( $i = $aStart ; $i <= $aFinish ; $i++ )
|
|
{
|
|
$ai = &$keyGen( $a->[$i], @_ );
|
|
if ( exists( $bMatches->{$ai} ) )
|
|
{
|
|
$k = 0;
|
|
for $j ( @{ $bMatches->{$ai} } )
|
|
{
|
|
|
|
# optimization: most of the time this will be true
|
|
if ( $k and $thresh->[$k] > $j and $thresh->[ $k - 1 ] < $j )
|
|
{
|
|
$thresh->[$k] = $j;
|
|
}
|
|
else
|
|
{
|
|
$k = _replaceNextLargerWith( $thresh, $j, $k );
|
|
}
|
|
|
|
# oddly, it's faster to always test this (CPU cache?).
|
|
if ( defined($k) )
|
|
{
|
|
$links->[$k] =
|
|
[ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (@$thresh)
|
|
{
|
|
return $prunedCount + @$thresh if $counting;
|
|
for ( my $link = $links->[$#$thresh] ; $link ; $link = $link->[0] )
|
|
{
|
|
$matchVector->[ $link->[1] ] = $link->[2];
|
|
}
|
|
}
|
|
elsif ($counting)
|
|
{
|
|
return $prunedCount;
|
|
}
|
|
|
|
return wantarray ? @$matchVector : $matchVector;
|
|
}
|
|
|
|
sub traverse_sequences
|
|
{
|
|
my $a = shift; # array ref
|
|
my $b = shift; # array ref
|
|
my $callbacks = shift || {};
|
|
my $keyGen = shift;
|
|
my $matchCallback = $callbacks->{'MATCH'} || sub { };
|
|
my $discardACallback = $callbacks->{'DISCARD_A'} || sub { };
|
|
my $finishedACallback = $callbacks->{'A_FINISHED'};
|
|
my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { };
|
|
my $finishedBCallback = $callbacks->{'B_FINISHED'};
|
|
my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ );
|
|
|
|
# Process all the lines in @$matchVector
|
|
my $lastA = $#$a;
|
|
my $lastB = $#$b;
|
|
my $bi = 0;
|
|
my $ai;
|
|
|
|
for ( $ai = 0 ; $ai <= $#$matchVector ; $ai++ )
|
|
{
|
|
my $bLine = $matchVector->[$ai];
|
|
if ( defined($bLine) ) # matched
|
|
{
|
|
&$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine;
|
|
&$matchCallback( $ai, $bi++, @_ );
|
|
}
|
|
else
|
|
{
|
|
&$discardACallback( $ai, $bi, @_ );
|
|
}
|
|
}
|
|
|
|
# The last entry (if any) processed was a match.
|
|
# $ai and $bi point just past the last matching lines in their sequences.
|
|
|
|
while ( $ai <= $lastA or $bi <= $lastB )
|
|
{
|
|
|
|
# last A?
|
|
if ( $ai == $lastA + 1 and $bi <= $lastB )
|
|
{
|
|
if ( defined($finishedACallback) )
|
|
{
|
|
&$finishedACallback( $lastA, @_ );
|
|
$finishedACallback = undef;
|
|
}
|
|
else
|
|
{
|
|
&$discardBCallback( $ai, $bi++, @_ ) while $bi <= $lastB;
|
|
}
|
|
}
|
|
|
|
# last B?
|
|
if ( $bi == $lastB + 1 and $ai <= $lastA )
|
|
{
|
|
if ( defined($finishedBCallback) )
|
|
{
|
|
&$finishedBCallback( $lastB, @_ );
|
|
$finishedBCallback = undef;
|
|
}
|
|
else
|
|
{
|
|
&$discardACallback( $ai++, $bi, @_ ) while $ai <= $lastA;
|
|
}
|
|
}
|
|
|
|
&$discardACallback( $ai++, $bi, @_ ) if $ai <= $lastA;
|
|
&$discardBCallback( $ai, $bi++, @_ ) if $bi <= $lastB;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
sub traverse_balanced
|
|
{
|
|
my $a = shift; # array ref
|
|
my $b = shift; # array ref
|
|
my $callbacks = shift || {};
|
|
my $keyGen = shift;
|
|
my $matchCallback = $callbacks->{'MATCH'} || sub { };
|
|
my $discardACallback = $callbacks->{'DISCARD_A'} || sub { };
|
|
my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { };
|
|
my $changeCallback = $callbacks->{'CHANGE'};
|
|
my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ );
|
|
|
|
# Process all the lines in match vector
|
|
my $lastA = $#$a;
|
|
my $lastB = $#$b;
|
|
my $bi = 0;
|
|
my $ai = 0;
|
|
my $ma = -1;
|
|
my $mb;
|
|
|
|
while (1)
|
|
{
|
|
|
|
# Find next match indices $ma and $mb
|
|
do {
|
|
$ma++;
|
|
} while(
|
|
$ma <= $#$matchVector
|
|
&& !defined $matchVector->[$ma]
|
|
);
|
|
|
|
last if $ma > $#$matchVector; # end of matchVector?
|
|
$mb = $matchVector->[$ma];
|
|
|
|
# Proceed with discard a/b or change events until
|
|
# next match
|
|
while ( $ai < $ma || $bi < $mb )
|
|
{
|
|
|
|
if ( $ai < $ma && $bi < $mb )
|
|
{
|
|
|
|
# Change
|
|
if ( defined $changeCallback )
|
|
{
|
|
&$changeCallback( $ai++, $bi++, @_ );
|
|
}
|
|
else
|
|
{
|
|
&$discardACallback( $ai++, $bi, @_ );
|
|
&$discardBCallback( $ai, $bi++, @_ );
|
|
}
|
|
}
|
|
elsif ( $ai < $ma )
|
|
{
|
|
&$discardACallback( $ai++, $bi, @_ );
|
|
}
|
|
else
|
|
{
|
|
|
|
# $bi < $mb
|
|
&$discardBCallback( $ai, $bi++, @_ );
|
|
}
|
|
}
|
|
|
|
# Match
|
|
&$matchCallback( $ai++, $bi++, @_ );
|
|
}
|
|
|
|
while ( $ai <= $lastA || $bi <= $lastB )
|
|
{
|
|
if ( $ai <= $lastA && $bi <= $lastB )
|
|
{
|
|
|
|
# Change
|
|
if ( defined $changeCallback )
|
|
{
|
|
&$changeCallback( $ai++, $bi++, @_ );
|
|
}
|
|
else
|
|
{
|
|
&$discardACallback( $ai++, $bi, @_ );
|
|
&$discardBCallback( $ai, $bi++, @_ );
|
|
}
|
|
}
|
|
elsif ( $ai <= $lastA )
|
|
{
|
|
&$discardACallback( $ai++, $bi, @_ );
|
|
}
|
|
else
|
|
{
|
|
|
|
# $bi <= $lastB
|
|
&$discardBCallback( $ai, $bi++, @_ );
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
sub prepare
|
|
{
|
|
my $a = shift; # array ref
|
|
my $keyGen = shift; # code ref
|
|
|
|
# set up code ref
|
|
$keyGen = sub { $_[0] } unless defined($keyGen);
|
|
|
|
return scalar _withPositionsOfInInterval( $a, 0, $#$a, $keyGen, @_ );
|
|
}
|
|
|
|
sub LCS
|
|
{
|
|
my $a = shift; # array ref
|
|
my $b = shift; # array ref or hash ref
|
|
my $matchVector = _longestCommonSubsequence( $a, $b, 0, @_ );
|
|
my @retval;
|
|
my $i;
|
|
for ( $i = 0 ; $i <= $#$matchVector ; $i++ )
|
|
{
|
|
if ( defined( $matchVector->[$i] ) )
|
|
{
|
|
push ( @retval, $a->[$i] );
|
|
}
|
|
}
|
|
return wantarray ? @retval : \@retval;
|
|
}
|
|
|
|
sub LCS_length
|
|
{
|
|
my $a = shift; # array ref
|
|
my $b = shift; # array ref or hash ref
|
|
return _longestCommonSubsequence( $a, $b, 1, @_ );
|
|
}
|
|
|
|
sub LCSidx
|
|
{
|
|
my $a= shift @_;
|
|
my $b= shift @_;
|
|
my $match= _longestCommonSubsequence( $a, $b, 0, @_ );
|
|
my @am= grep defined $match->[$_], 0..$#$match;
|
|
my @bm= @{$match}[@am];
|
|
return \@am, \@bm;
|
|
}
|
|
|
|
sub compact_diff
|
|
{
|
|
my $a= shift @_;
|
|
my $b= shift @_;
|
|
my( $am, $bm )= LCSidx( $a, $b, @_ );
|
|
my @cdiff;
|
|
my( $ai, $bi )= ( 0, 0 );
|
|
push @cdiff, $ai, $bi;
|
|
while( 1 ) {
|
|
while( @$am && $ai == $am->[0] && $bi == $bm->[0] ) {
|
|
shift @$am;
|
|
shift @$bm;
|
|
++$ai, ++$bi;
|
|
}
|
|
push @cdiff, $ai, $bi;
|
|
last if ! @$am;
|
|
$ai = $am->[0];
|
|
$bi = $bm->[0];
|
|
push @cdiff, $ai, $bi;
|
|
}
|
|
push @cdiff, 0+@$a, 0+@$b
|
|
if $ai < @$a || $bi < @$b;
|
|
return wantarray ? @cdiff : \@cdiff;
|
|
}
|
|
|
|
sub diff
|
|
{
|
|
my $a = shift; # array ref
|
|
my $b = shift; # array ref
|
|
my $retval = [];
|
|
my $hunk = [];
|
|
my $discard = sub {
|
|
push @$hunk, [ '-', $_[0], $a->[ $_[0] ] ];
|
|
};
|
|
my $add = sub {
|
|
push @$hunk, [ '+', $_[1], $b->[ $_[1] ] ];
|
|
};
|
|
my $match = sub {
|
|
push @$retval, $hunk
|
|
if 0 < @$hunk;
|
|
$hunk = []
|
|
};
|
|
traverse_sequences( $a, $b,
|
|
{ MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add }, @_ );
|
|
&$match();
|
|
return wantarray ? @$retval : $retval;
|
|
}
|
|
|
|
sub sdiff
|
|
{
|
|
my $a = shift; # array ref
|
|
my $b = shift; # array ref
|
|
my $retval = [];
|
|
my $discard = sub { push ( @$retval, [ '-', $a->[ $_[0] ], "" ] ) };
|
|
my $add = sub { push ( @$retval, [ '+', "", $b->[ $_[1] ] ] ) };
|
|
my $change = sub {
|
|
push ( @$retval, [ 'c', $a->[ $_[0] ], $b->[ $_[1] ] ] );
|
|
};
|
|
my $match = sub {
|
|
push ( @$retval, [ 'u', $a->[ $_[0] ], $b->[ $_[1] ] ] );
|
|
};
|
|
traverse_balanced(
|
|
$a,
|
|
$b,
|
|
{
|
|
MATCH => $match,
|
|
DISCARD_A => $discard,
|
|
DISCARD_B => $add,
|
|
CHANGE => $change,
|
|
},
|
|
@_
|
|
);
|
|
return wantarray ? @$retval : $retval;
|
|
}
|
|
|
|
########################################
|
|
my $Root= __PACKAGE__;
|
|
package Algorithm::Diff::_impl;
|
|
use strict;
|
|
|
|
sub _Idx() { 0 } # $me->[_Idx]: Ref to array of hunk indices
|
|
# 1 # $me->[1]: Ref to first sequence
|
|
# 2 # $me->[2]: Ref to second sequence
|
|
sub _End() { 3 } # $me->[_End]: Diff between forward and reverse pos
|
|
sub _Same() { 4 } # $me->[_Same]: 1 if pos 1 contains unchanged items
|
|
sub _Base() { 5 } # $me->[_Base]: Added to range's min and max
|
|
sub _Pos() { 6 } # $me->[_Pos]: Which hunk is currently selected
|
|
sub _Off() { 7 } # $me->[_Off]: Offset into _Idx for current position
|
|
sub _Min() { -2 } # Added to _Off to get min instead of max+1
|
|
|
|
sub Die
|
|
{
|
|
require Carp;
|
|
Carp::confess( @_ );
|
|
}
|
|
|
|
sub _ChkPos
|
|
{
|
|
my( $me )= @_;
|
|
return if $me->[_Pos];
|
|
my $meth= ( caller(1) )[3];
|
|
Die( "Called $meth on 'reset' object" );
|
|
}
|
|
|
|
sub _ChkSeq
|
|
{
|
|
my( $me, $seq )= @_;
|
|
return $seq + $me->[_Off]
|
|
if 1 == $seq || 2 == $seq;
|
|
my $meth= ( caller(1) )[3];
|
|
Die( "$meth: Invalid sequence number ($seq); must be 1 or 2" );
|
|
}
|
|
|
|
sub getObjPkg
|
|
{
|
|
my( $us )= @_;
|
|
return ref $us if ref $us;
|
|
return $us . "::_obj";
|
|
}
|
|
|
|
sub new
|
|
{
|
|
my( $us, $seq1, $seq2, $opts ) = @_;
|
|
my @args;
|
|
for( $opts->{keyGen} ) {
|
|
push @args, $_ if $_;
|
|
}
|
|
for( $opts->{keyGenArgs} ) {
|
|
push @args, @$_ if $_;
|
|
}
|
|
my $cdif= Algorithm::Diff::compact_diff( $seq1, $seq2, @args );
|
|
my $same= 1;
|
|
if( 0 == $cdif->[2] && 0 == $cdif->[3] ) {
|
|
$same= 0;
|
|
splice @$cdif, 0, 2;
|
|
}
|
|
my @obj= ( $cdif, $seq1, $seq2 );
|
|
$obj[_End] = (1+@$cdif)/2;
|
|
$obj[_Same] = $same;
|
|
$obj[_Base] = 0;
|
|
my $me = bless \@obj, $us->getObjPkg();
|
|
$me->Reset( 0 );
|
|
return $me;
|
|
}
|
|
|
|
sub Reset
|
|
{
|
|
my( $me, $pos )= @_;
|
|
$pos= int( $pos || 0 );
|
|
$pos += $me->[_End]
|
|
if $pos < 0;
|
|
$pos= 0
|
|
if $pos < 0 || $me->[_End] <= $pos;
|
|
$me->[_Pos]= $pos || !1;
|
|
$me->[_Off]= 2*$pos - 1;
|
|
return $me;
|
|
}
|
|
|
|
sub Base
|
|
{
|
|
my( $me, $base )= @_;
|
|
my $oldBase= $me->[_Base];
|
|
$me->[_Base]= 0+$base if defined $base;
|
|
return $oldBase;
|
|
}
|
|
|
|
sub Copy
|
|
{
|
|
my( $me, $pos, $base )= @_;
|
|
my @obj= @$me;
|
|
my $you= bless \@obj, ref($me);
|
|
$you->Reset( $pos ) if defined $pos;
|
|
$you->Base( $base );
|
|
return $you;
|
|
}
|
|
|
|
sub Next {
|
|
my( $me, $steps )= @_;
|
|
$steps= 1 if ! defined $steps;
|
|
if( $steps ) {
|
|
my $pos= $me->[_Pos];
|
|
my $new= $pos + $steps;
|
|
$new= 0 if $pos && $new < 0;
|
|
$me->Reset( $new )
|
|
}
|
|
return $me->[_Pos];
|
|
}
|
|
|
|
sub Prev {
|
|
my( $me, $steps )= @_;
|
|
$steps= 1 if ! defined $steps;
|
|
my $pos= $me->Next(-$steps);
|
|
$pos -= $me->[_End] if $pos;
|
|
return $pos;
|
|
}
|
|
|
|
sub Diff {
|
|
my( $me )= @_;
|
|
$me->_ChkPos();
|
|
return 0 if $me->[_Same] == ( 1 & $me->[_Pos] );
|
|
my $ret= 0;
|
|
my $off= $me->[_Off];
|
|
for my $seq ( 1, 2 ) {
|
|
$ret |= $seq
|
|
if $me->[_Idx][ $off + $seq + _Min ]
|
|
< $me->[_Idx][ $off + $seq ];
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
sub Min {
|
|
my( $me, $seq, $base )= @_;
|
|
$me->_ChkPos();
|
|
my $off= $me->_ChkSeq($seq);
|
|
$base= $me->[_Base] if !defined $base;
|
|
return $base + $me->[_Idx][ $off + _Min ];
|
|
}
|
|
|
|
sub Max {
|
|
my( $me, $seq, $base )= @_;
|
|
$me->_ChkPos();
|
|
my $off= $me->_ChkSeq($seq);
|
|
$base= $me->[_Base] if !defined $base;
|
|
return $base + $me->[_Idx][ $off ] -1;
|
|
}
|
|
|
|
sub Range {
|
|
my( $me, $seq, $base )= @_;
|
|
$me->_ChkPos();
|
|
my $off = $me->_ChkSeq($seq);
|
|
if( !wantarray ) {
|
|
return $me->[_Idx][ $off ]
|
|
- $me->[_Idx][ $off + _Min ];
|
|
}
|
|
$base= $me->[_Base] if !defined $base;
|
|
return ( $base + $me->[_Idx][ $off + _Min ] )
|
|
.. ( $base + $me->[_Idx][ $off ] - 1 );
|
|
}
|
|
|
|
sub Items {
|
|
my( $me, $seq )= @_;
|
|
$me->_ChkPos();
|
|
my $off = $me->_ChkSeq($seq);
|
|
if( !wantarray ) {
|
|
return $me->[_Idx][ $off ]
|
|
- $me->[_Idx][ $off + _Min ];
|
|
}
|
|
return
|
|
@{$me->[$seq]}[
|
|
$me->[_Idx][ $off + _Min ]
|
|
.. ( $me->[_Idx][ $off ] - 1 )
|
|
];
|
|
}
|
|
|
|
sub Same {
|
|
my( $me )= @_;
|
|
$me->_ChkPos();
|
|
return wantarray ? () : 0
|
|
if $me->[_Same] != ( 1 & $me->[_Pos] );
|
|
return $me->Items(1);
|
|
}
|
|
|
|
my %getName;
|
|
BEGIN {
|
|
%getName= (
|
|
same => \&Same,
|
|
diff => \&Diff,
|
|
base => \&Base,
|
|
min => \&Min,
|
|
max => \&Max,
|
|
range=> \&Range,
|
|
items=> \&Items, # same thing
|
|
);
|
|
}
|
|
|
|
sub Get
|
|
{
|
|
my $me= shift @_;
|
|
$me->_ChkPos();
|
|
my @value;
|
|
for my $arg ( @_ ) {
|
|
for my $word ( split ' ', $arg ) {
|
|
my $meth;
|
|
if( $word !~ /^(-?\d+)?([a-zA-Z]+)([12])?$/
|
|
|| not $meth= $getName{ lc $2 }
|
|
) {
|
|
Die( $Root, ", Get: Invalid request ($word)" );
|
|
}
|
|
my( $base, $name, $seq )= ( $1, $2, $3 );
|
|
push @value, scalar(
|
|
4 == length($name)
|
|
? $meth->( $me )
|
|
: $meth->( $me, $seq, $base )
|
|
);
|
|
}
|
|
}
|
|
if( wantarray ) {
|
|
return @value;
|
|
} elsif( 1 == @value ) {
|
|
return $value[0];
|
|
}
|
|
Die( 0+@value, " values requested from ",
|
|
$Root, "'s Get in scalar context" );
|
|
}
|
|
|
|
|
|
my $Obj= getObjPkg($Root);
|
|
no strict 'refs';
|
|
|
|
for my $meth ( qw( new getObjPkg ) ) {
|
|
*{$Root."::".$meth} = \&{$meth};
|
|
*{$Obj ."::".$meth} = \&{$meth};
|
|
}
|
|
for my $meth ( qw(
|
|
Next Prev Reset Copy Base Diff
|
|
Same Items Range Min Max Get
|
|
_ChkPos _ChkSeq
|
|
) ) {
|
|
*{$Obj."::".$meth} = \&{$meth};
|
|
}
|
|
|
|
1;
|
|
# This version released by Tye McQueen (http://perlmonks.org/?node=tye).
|
|
#
|
|
# =head1 LICENSE
|
|
#
|
|
# Parts Copyright (c) 2000-2004 Ned Konz. All rights reserved.
|
|
# Parts by Tye McQueen.
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify it
|
|
# under the same terms as Perl.
|
|
#
|
|
# =head1 MAILING LIST
|
|
#
|
|
# Mark-Jason still maintains a mailing list. To join a low-volume mailing
|
|
# list for announcements related to diff and Algorithm::Diff, send an
|
|
# empty mail message to mjd-perl-diff-request@plover.com.
|
|
# =head1 CREDITS
|
|
#
|
|
# Versions through 0.59 (and much of this documentation) were written by:
|
|
#
|
|
# Mark-Jason Dominus, mjd-perl-diff@plover.com
|
|
#
|
|
# This version borrows some documentation and routine names from
|
|
# Mark-Jason's, but Diff.pm's code was completely replaced.
|
|
#
|
|
# This code was adapted from the Smalltalk code of Mario Wolczko
|
|
# <mario@wolczko.com>, which is available at
|
|
# ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st
|
|
#
|
|
# C<sdiff> and C<traverse_balanced> were written by Mike Schilli
|
|
# <m@perlmeister.com>.
|
|
#
|
|
# The algorithm is that described in
|
|
# I<A Fast Algorithm for Computing Longest Common Subsequences>,
|
|
# CACM, vol.20, no.5, pp.350-353, May 1977, with a few
|
|
# minor improvements to improve the speed.
|
|
#
|
|
# Much work was done by Ned Konz (perl@bike-nomad.com).
|
|
#
|
|
# The OO interface and some other changes are by Tye McQueen.
|
|
#
|
|
EOAlgDiff
|
|
# 2}}}
|
|
my $problems = 0;
|
|
$HAVE_Algorithm_Diff = 0;
|
|
my $dir = "";
|
|
if ($opt_sdir) {
|
|
++$TEMP_OFF;
|
|
$dir = "$opt_sdir/$TEMP_OFF";
|
|
File::Path::rmtree($dir) if is_dir($dir);
|
|
File::Path::mkpath($dir) unless is_dir($dir);
|
|
} else {
|
|
# let File::Temp create a suitable temporary directory
|
|
$dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit
|
|
$TEMP_INST{ $dir } = "Algorithm::Diff";
|
|
}
|
|
print "Using temp dir [$dir] to install Algorithm::Diff\n" if $opt_v;
|
|
my $Algorithm_dir = "$dir/Algorithm";
|
|
my $Algorithm_Diff_dir = "$dir/Algorithm/Diff";
|
|
mkdir $Algorithm_dir ;
|
|
mkdir $Algorithm_Diff_dir;
|
|
|
|
my $OUT = open_file('>', "$dir/Algorithm/Diff.pm", 1);
|
|
if (defined $OUT) {
|
|
print $OUT $Algorithm_Diff_Contents;
|
|
$OUT->close;
|
|
} else {
|
|
warn "Failed to install Algorithm/Diff.pm\n";
|
|
$problems = 1;
|
|
}
|
|
|
|
push @INC, $dir; # between this & Regexp::Common only need to do once
|
|
eval "use Algorithm::Diff qw / sdiff /";
|
|
$HAVE_Algorithm_Diff = 1 unless $problems;
|
|
} # 1}}}
|
|
__END__
|
|
mode values (stat $item)[2]
|
|
Unix Windows
|
|
file: 33188 33206
|
|
dir : 16832 16895
|
|
link: 33261 33206
|
|
pipe: 4544 null
|