#! /usr/bin/perl
#
# clientstats.pl
#
# This script will get statistical data on newsreader (client) usage
# from a database.
#
# It is part of the NewsStats package.
#
# Copyright (c) 2025 Thomas Hochstein <thh@thh.name>
#
# It can be redistributed and/or modified under the same terms under
# which Perl itself is published.

BEGIN {
  use File::Basename;
  # we're in .../bin, so our module is in ../lib
  push(@INC, dirname($0).'/../lib');
}
use strict;
use warnings;

use NewsStats qw(:DEFAULT :TimePeriods :Output :SQLHelper ReadGroupList);

use DBI;
use Getopt::Long qw(GetOptions);
Getopt::Long::config ('bundling');

################################# Main program #################################

### read commandline options
my ($OptCaptions,$OptComments,$OptDB,$OptFileTemplate,$OptFormat,
    $OptGroupBy,$LowBound,$OptMonth,$OptNames,$OptOrderBy,
    $OptReportType,$OptSums,$UppBound,$OptVersions,$OptConfFile);
GetOptions ('c|captions!'    => \$OptCaptions,
            'comments!'      => \$OptComments,
            'db=s'           => \$OptDB,
            'filetemplate=s' => \$OptFileTemplate,
            'f|format=s'     => \$OptFormat,
            'g|group-by=s'   => \$OptGroupBy,
            'l|lower=i'      => \$LowBound,
            'm|month=s'      => \$OptMonth,
            'n|names=s'      => \$OptNames,
            'o|order-by=s'   => \$OptOrderBy,
            'r|report=s'     => \$OptReportType,
            's|sums!'        => \$OptSums,
            'u|upper=i'      => \$UppBound,
            'v|versions!'    => \$OptVersions,
            'conffile=s'     => \$OptConfFile,
            'h|help'         => \&ShowPOD,
            'V|version'      => \&ShowVersion) or exit 1;
# parse parameters
# $OptComments defaults to TRUE if --filetemplate is not used
$OptComments = 1 if (!$OptFileTemplate && !defined($OptComments));
# parse $OptReportType
if ($OptReportType) {
  if ($OptReportType =~ /sums?/i) {
    $OptReportType = 'sum';
  } else {
    $OptReportType  = 'default';
  }
}

### read configuration
my %Conf = %{ReadConfig($OptConfFile)};

### set DBTable
$Conf{'DBTable'} = $Conf{'DBTableClnts'};
$Conf{'DBTable'} = $OptDB if $OptDB;
  
### init database
my $DBHandle = InitDB(\%Conf,1);

### get time period and names, prepare SQL 'WHERE' clause
# get time period
# and set caption for output and expression for SQL 'WHERE' clause
my ($CaptionPeriod,$SQLWherePeriod) = &GetTimePeriod($OptMonth);
# bail out if --month is invalid
&Bleat(2,"--month option has an invalid format - ".
         "please use 'YYYY-MM', 'YYYY-MM:YYYY-MM' or 'ALL'!") if !$CaptionPeriod;
# get list of clients and set expression for SQL 'WHERE' clause
# with placeholders as well as a list of names to bind to them
my ($SQLWhereNames,@SQLBindNames);
if ($OptNames) {
  ($SQLWhereNames,@SQLBindNames) = &SQLGroupList($OptNames,'client');
  # bail out if --names is invalid
  &Bleat(2,"--names option has an invalid format!")
    if !$SQLWhereNames;
}

### build SQL WHERE clause
my $ExcludeSums = $OptSums ? '' : "client != 'ALL'";
my $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames,
                                    $ExcludeSums,"version = 'ALL'",
                                    &SQLSetBounds('default',$LowBound,$UppBound));

### get sort order and build SQL 'ORDER BY' clause
# force to 'month' for $OptReportType 'sum'
$OptGroupBy = 'month' if ($OptReportType and $OptReportType ne 'default');
# default to 'name' if $OptGroupBy is not set and
# just one name is requested, but more than one month
$OptGroupBy = 'name' if (!$OptGroupBy and $OptMonth and $OptMonth =~ /:/
                         and $OptNames and $OptNames !~ /[:*%]/);
# parse $OptGroupBy to $GroupBy, create ORDER BY clause $SQLOrderClause
# if $OptGroupBy is still not set, SQLSortOrder() will default to 'month'
my ($GroupBy,$SQLOrderClause) = SQLSortOrder($OptGroupBy, $OptOrderBy, 'client, version');
# $GroupBy will contain 'month' or 'client, version' (parsed result of $OptGroupBy)
# set it to 'month' or 'key' for OutputData()
$GroupBy = ($GroupBy eq 'month') ? 'month' : 'key';

### get report type and build SQL 'SELECT' query
my $SQLSelect;
my $SQLGroupClause = '';

if ($OptReportType and $OptReportType ne 'default') {
  $SQLGroupClause = "GROUP BY client, version";
  # change $SQLOrderClause: replace everything before 'postings'
  $SQLOrderClause =~ s/BY.+postings/BY postings/;
  $SQLSelect = "'All months',LEFT(client,40),SUM(postings)";
  # change $SQLOrderClause: replace 'postings' with 'SUM(postings)'
  $SQLOrderClause =~ s/postings/SUM(postings)/;
 } else {
  $SQLSelect = "month,LEFT(client,40),postings";
};

### get length of longest name delivered by query
### for formatting purposes
my $Field = ($GroupBy eq 'month') ? 'LEFT(client,40)' : 'month';
my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'},
                                              $Field,'postings',$SQLWhereClause,
                                              '',@SQLBindNames);

### build and execute SQL query
my ($DBQuery);
# prepare query
$DBQuery = $DBHandle->prepare(sprintf('SELECT %s FROM %s.%s %s %s %s',
                                      $SQLSelect,
                                      $Conf{'DBDatabase'},$Conf{'DBTable'},
                                      $SQLWhereClause,$SQLGroupClause,
                                      $SQLOrderClause));
# execute query
$DBQuery->execute(@SQLBindNames)
  or &Bleat(2,sprintf("Can't get client data for %s from %s.%s: %s\n",
                      $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
                      $DBI::errstr));

### output results
# set default to 'pretty'
$OptFormat = 'pretty' if !$OptFormat;
# print captions if --caption is set
my $LeadIn;
if ($OptCaptions && $OptComments) {
  # print time period with report type
  my $CaptionReportType = '(number of postings for each month)';
  if ($OptReportType and $OptReportType ne 'default') {
    $CaptionReportType  = '(number of all postings for that time period)';
  }
  $LeadIn .= sprintf("# ----- Report for %s %s\n",$CaptionPeriod,$CaptionReportType);
  # print name list if --names is set
  $LeadIn .= sprintf("# ----- Names: %s\n",join(',',split(/:/,$OptNames)))
    if $OptNames;
  # print boundaries, if set
  my $CaptionBoundary= '(counting only months fulfilling this condition)';
  $LeadIn .= sprintf("# ----- Threshold: %s %s x %s %s %s\n",
         $LowBound ? $LowBound : '',$LowBound ? '=>' : '',
         $UppBound ? '<=' : '',$UppBound ? $UppBound : '',$CaptionBoundary)
    if ($LowBound or $UppBound);
  # print primary and secondary sort order
  $LeadIn .= sprintf("# ----- Grouped by %s (%s), sorted %s%s\n",
         ($GroupBy eq 'month') ? 'Months' : 'Names',
         ($OptGroupBy and $OptGroupBy =~ /-?desc$/i) ? 'descending' : 'ascending',
         ($OptOrderBy and $OptOrderBy =~ /posting/i) ? 'by number of postings ' : '',
         ($OptOrderBy and $OptOrderBy =~ /-?desc$/i) ? 'descending' : 'ascending');
}

# output data
# (changed code copy from NewsStats::OutputData)
my ($LastIteration, $FileName, $Handle, $OUT);

# define output types
my %LegalOutput;
@LegalOutput{('dump','list','pretty')} = ();
# bail out if format is unknown
&Bleat(2,"Unknown output type '$OptFormat'!") if !exists($LegalOutput{$OptFormat});

while (my ($Month, $Key, $Value) = $DBQuery->fetchrow_array) {
  # save client for later use
  my $Client = $Key;
  # care for correct sorting order and abstract from month and keys:
  # $Caption will be $Month or $Key, according to sorting order,
  # and $Key will be $Key or $Month, respectively
  my $Caption;
  if ($GroupBy eq 'key') {
    $Caption = $Key;
    $Key     = $Month;
  } else {
    $Caption = $Month;
  }
  # set output file handle
  if (!$OptFileTemplate) {
    $Handle = *STDOUT{IO}; # set $Handle to a reference to STDOUT
  } elsif (!defined($LastIteration) or $LastIteration ne $Caption) {
    close $OUT if ($LastIteration);
    # safeguards for filename creation:
    # replace potential problem characters with '_'
    $FileName = sprintf('%s-%s',$OptFileTemplate,$Caption);
    $FileName =~ s/[^a-zA-Z0-9_-]+/_/g;
    open ($OUT,">$FileName")
      or &Bleat(2,sprintf("Cannot open output file '%s': $!",
                          $FileName));
    $Handle = $OUT;
  };
  print $Handle &FormatOutput($OptFormat, $OptComments, $LeadIn, $Caption,
                              $Key, $Value, 0, $MaxLength, $MaxValLength, $LastIteration);
  # output client versions
  if ($OptVersions) {
    ### get client versions
    # $SQLWhereClause without 'ALL' version, with client and month set
    $SQLWhereClause = SQLBuildClause('where',$SQLWherePeriod,$SQLWhereNames,
                                      $ExcludeSums,"version != 'ALL'",
                                      'client = ?','month = ?',
                                      &SQLSetBounds('default',$LowBound,$UppBound));
    # push client and month to @SQLVersBindNames
    my @SQLVersBindNames = @SQLBindNames;
    push (@SQLVersBindNames, ($Client, $Month));

    # save length of longest client
    my $ClientMaxLenght    = $MaxLength;
    my $ClientMaxValLenght = $MaxValLength;
    # get length of longest version delivered by query
    # for formatting purposes
    my ($MaxLength,$MaxValLength) = &GetMaxLength($DBHandle,$Conf{'DBTable'},
                                                  'version','postings',$SQLWhereClause,
                                                  '',@SQLVersBindNames);
    if ($MaxLength) {
      # add lenght of '- '
      $MaxLength += 2;
      # set to length of longest client, if longer
      $MaxLength    = $ClientMaxLenght    if $ClientMaxLenght    > $MaxLength;
      $MaxValLength = $ClientMaxValLenght if $ClientMaxValLenght > $MaxValLength;
    }

    # prepare query
    my $DBVersQuery = $DBHandle->prepare(sprintf('SELECT version,postings FROM %s.%s %s %s %s',
                                                 $Conf{'DBDatabase'},$Conf{'DBTable'},
                                                 $SQLWhereClause,$SQLGroupClause,
                                                 $SQLOrderClause));
    # execute query
    $DBVersQuery->execute(@SQLVersBindNames)
      or &Bleat(2,sprintf("Can't get version data for %s from %s.%s: %s\n",
                          $CaptionPeriod,$Conf{'DBDatabase'},$Conf{'DBTable'},
                          $DBI::errstr));
    # output versions
    while (my ($Version, $Postings) = $DBVersQuery->fetchrow_array) {
      $Version = '- ' . $Version;
      print $Handle &FormatOutput($OptFormat, $OptComments, $LeadIn, '',
                                  $Version, $Postings, 0, $MaxLength, $MaxValLength,
                                  '');
    }
  }
  $LastIteration = $Caption;
};
close $OUT if ($OptFileTemplate);

### close handles
$DBHandle->disconnect;

__END__

################################ Documentation #################################

=head1 NAME

clientstats - create reports on client usage

=head1 SYNOPSIS

B<clientstats> [B<-Vhcsv> B<--comments>] [B<-m> I<YYYY-MM>[:I<YYYY-MM>] | I<all>] [B<-n> I<client(s)>] [B<-r> I<report type>] [B<-l> I<lower boundary>] [B<-u> I<upper boundary>] [B<-g> I<group by>] [B<-o> I<order by>] [B<-f> I<output format>] [B<--filetemplate> I<filename template>] [B<--db> I<database table>] [B<--conffile> I<filename>]

=head1 REQUIREMENTS

See L<doc/README>.

=head1 DESCRIPTION

This script create reports on newsgroup usage (number of postings
using each client per month) taken from result tables created by
B<gatherstats.pl>.

=head2 Features and options

=head3 Time period and names

The time period to act on defaults to last month; you can assign another
time period or a single month (or drop all time constraints) via the
B<--month> option (see below).

B<clientstats> will process all clients by default; you can limit
processing to only some clients by supplying a list of those names by
using the B<--names> option (see below).

=head3 Report type

You can choose between different B<--report> types: postings per month
or all postings summed up; for details, see below.

=head3 Upper and lower boundaries

Furthermore you can set an upper and/or lower boundary to exclude some
results from output via the B<--lower> and B<--upper> options,
respectively. By default, all clients with more and/or less postings
per month will be excluded from the result set (i.e. not shown and
not considered for sum reports).

=head3 Sorting and formatting the output

By default, all results are grouped by month; you can group results by
clients instead via the B<--group-by> option. Within those groups,
the list of clients (or months) is sorted alphabetically
(or chronologically, respectively) ascending. You can change that order
(and sort by number of postings) with the B<--order-by> option. For
details and exceptions, please see below.

The results will be formatted as a kind of table; you can change the
output format to a simple list or just a list of names and number of
postings with the B<--format> option. Captions will be added by means
of the B<--caption> option; all comments (and captions) can be
supressed by using B<--nocomments>.

Last but not least you can redirect all output to a number of files,
e.g. one for each month, by submitting the B<--filetemplate> option,
see below.

=head2 Configuration

B<clientstats> will read its configuration from F<newsstats.conf>
which should be present in etc/ via Config::Auto or from a configuration
file submitted by the B<--conffile> option.

See doc/INSTALL for an overview of possible configuration options.

You can override some configuration options via the B<--db> option.

=head1 OPTIONS

=over 3

=item B<-V>, B<--version>

Display version and copyright information and exit.

=item B<-h>, B<--help>

Display this man page and exit.

=item B<-m>, B<--month> I<YYYY-MM[:YYYY-MM]|all>

Set processing period to a single month in YYYY-MM format or to a time
period between two month in YYYY-MM:YYYY-MM format (two month, separated
by a colon). By using the keyword I<all> instead, you can set no
processing period to process the whole database. Defaults to last month.

=item B<-n>, B<--names> I<name(s)>

Limit processing to a certain set of client names. I<names(s)>
can be a single name (Thunderbird), a group of names (Ice*) or a list
of either of these, separated by colons, for example

   Forte Agent:Thunderbird:Ice*

Spaces or special characters like "*" need to be quoted from the shell,
like

   -n 'Forte Agent:Thunderbird:Ice*'

There is no way to limit processing to a specific version, but you can
alway grep through the output.

=item B<-s>, B<--sums|--nosums> (sum per month)

Include "virtual" clients named "ALL" for every month in output,
containing the sum of all detected clients for that month. False
by default.

=item B<-v>, B<--versions|--noversions> (client versions)

Include a list of all observed versions of each client in output.
Version information will be displayed with indents ('-') below each
client, sorted in the same way (by postings or alphanumeric). False
by default.

=item B<-r>, B<--report> I<default|sums>

Choose the report type: I<default> or I<sums>

By default, B<clientstats> will report the number of postings for each
client in each month. But it can also report the total sum of postings
per client for all months. Sums of B<--versions> can be included.

For report type I<sums>, the B<group-by> option has no meaning and
will be silently ignored (see below).

=item B<-l>, B<--lower> I<lower boundary>

Set the lower boundary. See below.

=item B<-l>, B<--upper> I<upper boundary>

Set the upper boundary.

By default, all clients with more postings per month than the
upper boundary and/or less postings per month than the lower boundary
will be excluded from further processing. For the default report that
means each month only clients with a number of postings between the
boundaries will be displayed. For the sums report, clients with a
number of postings exceeding the boundaries in all (!) months will
not be considered.

=item B<-g>, B<--group-by> I<month[-desc]|name[-desc]>

By default, all results are grouped by month, sorted chronologically in
ascending order, like this:

    # ----- 2012-01:
    40tude_Dialog:  5873
    Forte Agent  :  7735
    Thunderbird  : 20925
    # ----- 2012-02:
    40tude_Dialog:  4142
    Forte Agent  :  5895
    Thunderbird  : 19091

The results can be grouped by client instead via
B<--group-by> I<name>:

    # ----- 40tude_Dialog:
    2012-01:  5873
    2012-02:  4142
    # ----- Forte Agent:
    2012-01:  7735
    2012-02:  5895
    # ----- Thunderbird:
    2012-01: 20925
    2012-02: 19091

By appending I<-desc> to the group-by option parameter, you can reverse
the sort order - e.g. B<--group-by> I<month-desc> will give:

    # ----- 2012-02:
    40tude_Dialog:  4142
    Forte Agent  :  5895
    Thunderbird  : 19091
    # ----- 2012-01:
    40tude_Dialog:  5873
    Forte Agent  :  7735
    Thunderbird  : 20925

Sums reports (see above) will always be grouped by months; this option
will therefore be ignored.

=item B<-o>, B<--order-by> I<default[-desc]|postings[-desc]>

Within each group (a single month or single client, see above),
the report will be sorted by name (or month) in ascending alphabetical
order by default. You can change the sort order to descending or sort
by number of postings instead.

By default, output is sorted alphabetically:

    # ----- 2012-01:
    40tude_Dialog:  5873
    Forte Agent  :  7735
    Thunderbird  : 20925

Using B<--order-by> I<postings[-desc]>, it will be sorted from most
to least postings:

    # ----- 2012-01:
    Thunderbird  : 20925
    Forte Agent  :  7735
    40tude_Dialog:  5873

=item B<-f>, B<--format> I<pretty|list|dump>

Select the output format, I<pretty> (a kind of table) being the default:

    # ----- 2012-01:
    40tude_Dialog:  5873
    Forte Agent  :  7735
    # ----- 2012-02:
    40tude_Dialog:  4142
    Forte Agent  :  5895

I<list> format looks like this (each client preceded by month):

    2012-01 40tude_Dialog 5873
    2012-01 Forte Agent 7735
    2012-02 40tude_Dialog 4142
    2012-02 Forte Agent 5895

And I<dump> format looks like this:

    # 2012-01:
    40tude_Dialog 5873
    Forte Agent 7735
    # 2012-02:
    40tude_Dialog 4142
    Forte Agent 5895

You can remove the comments (lines after '#') by using B<--nocomments>,
see below.

=item B<-c>, B<--captions|--nocaptions>

Add captions to output, like this:

    ----- Report for 2012-01 to 2012-02 (number of postings for each month)
    ----- Names: Thunderbird
    ----- Threshold: 8000 => x (counting only month fulfilling this condition)
    ----- Grouped by Month (ascending), sorted by number of postings descending

False by default.

=item B<--comments|--nocomments>

Add comments (group headers) to I<dump> and I<pretty> output. True by
default as long as B<--filetemplate> is not set.

Use I<--nocomments> to suppress anything except client names or months
and numbers of postings.

=item B<--filetemplate> I<filename template>

Save output to file(s) instead of dumping it to STDOUT. B<clientstats>
will create one file for each month (or each client, according to the
setting of B<--group-by>, see above), with filenames composed by adding
year and month (or client names) to the I<filename template>, for
example with B<--filetemplate> I<stats>:

    stats-2012-01
    stats-2012-02
    ... and so on

=item B<--db> I<database table>

Override I<DBTableClnts> or I<DBTableClnts> from F<newsstats.conf>.

=item B<--conffile> I<filename>

Read configuration from I<filename> instead of F<newsstats.conf>.

=back

=head1 INSTALLATION

See L<doc/INSTALL>.

=head1 EXAMPLES

Show number of postings per client for lasth month in I<pretty> format:

    clientstats 

Show that report for January of 2010 and Thunderbird plus Ice*:

    clientstats --month 2010-01 --names 'Thunderbird:Ice*'

Only show clients with at least 30 postings last month and the versions
of those clients, ordered each by number of postings, descending,
in I<pretty> format:

    clientstats --lower 30 --versions --order-by postings-desc

List number of postings per client for each month of 2010 and redirect
output to one file for each month, named hosts-2010-01 and so on, in
machine-readable form (without formatting):

    clientstats -m 2010-01:2010-12 -f dump --filetemplate hosts

=head1 FILES

=over 4

=item F<bin/clientstats.pl>

The script itself.

=item F<lib/NewsStats.pm>

Library functions for the NewsStats package.

=item F<etc/newsstats.conf>

Runtime configuration file.

=back

=head1 BUGS

Please report any bugs or feature requests to the author or use the
bug tracker at L<https://code.virtcomm.de/thh/newsstats/issues>!

=head1 SEE ALSO

=over 2

=item -

L<doc/README>

=item -

L<doc/INSTALL>

=item -

gatherstats -h

=back

This script is part of the B<NewsStats> package.

=head1 AUTHOR

Thomas Hochstein <thh@thh.name>

=head1 COPYRIGHT AND LICENSE

Copyright (c) 2025 Thomas Hochstein <thh@thh.name>

This program is free software; you may redistribute it and/or modify it
under the same terms as Perl itself.

=cut
