File Coverage

File:blib/lib/Simba/CA.pm
Coverage:17.6%

linestmtbrancondsubpodtimecode
1#!/usr/bin/perl
2
3 - 55
=head1 NAME

Simba::CA

=head1 DESCRIPTION

Collecting Agent of the Simba backup system.

This class represents one instance of a running collecting agent.
The only user-callable methods are the constructor new and the instance 
method run, which collects all the files from various disk agents.

The Simba::CA package is a hashref with the following keys:

=over

=item basedir

=item unknown_uid

=item unknown_gid

=item fh_log

=item log_level

=item dbh

=item targets

=item ssh_id_file

=item target

=item last_backup

=item last_backup_id

=item timestamp

=item this_backup

=item session_id

=item file_pid

=item file_cfd

=item file_dfd

=back

=cut
56
57
58package Simba::CA;
59
2
2
2
78330
6
85
use strict;
60
2
2
2
8
2
93
use warnings;
61
62
2
2
2
838
16951
160
use Encode;
63
2
2
2
541
7809
117
use IPC::Open2;
64
2
2
2
530
10841
32
use POSIX qw(strftime);
65
2
2
2
4946
4
105
use Simba::Util qw(quote unquote typestr);
66
2
2
2
837
7553
125
use Readonly;
67
2
2
2
807
4698
102
use Digest::SHA;
68
2
2
2
12
2
109
use List::Util qw(min);
69
2
2
2
628
8327
99
use IO::Handle;
70
2
2
2
379
6306
37
use File::stat;
71
2
2
2
151
24
123
use Scalar::Util qw(tainted);
72
2
2
2
19124
34552
184
use DBI;
73
2
2
2
1533
2491
43
use Time::HiRes qw(gettimeofday);
74
75Readonly my $BUFSIZE => 128 * 1024;
76
77sub new {
78
3
0
346095
    my ($class, $opt) = @_;
79
80
3
9
    my $self = {};
81
3
16
    bless $self, $class;
82
83
3
10
    $self->{basedir} = '/backup';
84
3
7
    $self->{unknown_uid} = 65533;
85
3
7
    $self->{unknown_gid} = 65533;
86
3
15
    $self->{fh_log} = exists($opt->{fh_log}) ? $opt->{fh_log} : \*STDERR;
87
3
6
    $self->{log_level} = 99;
88
3
15
    if ($opt->{dbi}) {
89
0
0
0
0
        $self->{dbh} = DBI->connect(@{ $opt->{dbi} },
90                                       { AutoCommit => 0,
91                                         PrintError => 1,
92                                         RaiseError => 1
93                                       }
94                           );
95    } elsif ($opt->{dbi_file}) {
96
3
5
        my $fn = $opt->{dbi_file};
97
3
20959
        open(FN, "<$fn") or die "cannot open $fn: $!";
98
3
7849
        my $line = <FN>;
99
3
26
        close(FN);
100
3
41
        my @cred = split(/[\s\n]+/, $line);
101
3
88
        $self->{dbh} = DBI->connect(@cred,
102                                       { AutoCommit => 0,
103                                         PrintError => 1,
104                                         RaiseError => 1
105                                       }
106                           );
107    } elsif ($opt->{tokyocabinet}) {
108
0
0
        my $tdb = $self->{tdb} = TokyoCabinet::TDB->new();
109
0
0
        $tdb->open($opt->{tokyocabinet}, $tdb->WRITER, $tdb->OCREAT)
110            or die "open $opt->{tokyocabinet} failed: " . $tdb->errmsg($tdb->ecode());
111    }
112    # XXX - DBI
113
3
58496
    $self->{targets} = $self->{dbh}->selectall_arrayref("select * from filesets", { Slice => {} });
114
3
22271
    if ($opt->{filesets}) {
115
0
0
        $self->{targets} =
116            [
117                grep {
118
0
0
                    my $id = $_->{id};
119
0
0
0
0
0
0
                    grep { $id == $_ } @{ $opt->{filesets} }
120
0
0
                } @{ $self->{targets} }
121            ];
122    }
123
3
21
    if ($ENV{HOME} =~ m{([/\w]*)}) {
124
3
51
        if (-f "$1/.ssh/id_rsa") {
125
3
54
            if (my $st = stat("$1/.ssh/id_rsa")) {
126
3
441
                if ($st->uid == $>) {
127
3
80
                    $self->{ssh_id_file} = "$1/.ssh/id_rsa";
128                }
129            }
130        }
131    }
132
133
3
26
    return $self;
134}
135
136sub run {
137
0
0
0
    my ($self) = @_;
138
139    # run sequentially for prototype. In production we probably
140    # want some concurrency
141
0
0
0
0
    for my $target (@{$self->{targets}}) {
142
0
0
        $self->backup2disk($target);
143    }
144
0
0
    $self->log(3, "statistics:");
145
0
0
0
0
    for (sort keys %{ $self->{counts} }) {
146
0
0
        $self->log(3, " $_: $self->{counts}{$_}");
147    }
148
0
0
0
0
    for (sort keys %{ $self->{times} }) {
149
0
0
        $self->log(3, " $_: $self->{times}{$_} s");
150    }
151}
152
153sub backup2disk {
154
0
0
0
    my ($self, $target) = @_;
155
156
0
0
    $self->reserve_fileset($target);
157
158
0
0
    $self->log(3, "starting backup for target host " . $target->{host} . " dir " . $target->{dir});
159
0
0
    $self->{target} = $target;
160
161    # get previous generation
162
0
0
    $self->get_last_session();
163
164
0
0
    my $timestamp = $self->{timestamp} || strftime('%Y-%m-%dT%H.%M.%S', localtime);
165
0
0
    $self->{this_backup} = $self->{basedir} . "/$timestamp/" . $target->{host} . '/' . $target->{dir};
166
0
0
    $self->new_session();
167
168
0
0
    my ($list_pid, $list_cfd, $list_dfd); # connection to get list of files
169
0
0
    $list_pid = open2($list_dfd, $list_cfd,
170                      "/usr/bin/ssh",
171                      "-l", "simba_da",
172                      $self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (),
173                      $target->{host}, "da");
174
0
0
    $list_cfd->printflush("list $target->{dir}\n"); # XXX - encode!
175
0
0
    close($list_cfd);
176
0
0
    my $count = 0;
177
0
0
    while (<$list_dfd>) {
178
0
0
        $count++;
179
0
0
        chomp;
180
0
0
        $self->log(10, "file: $_");
181        # split into fields
182
0
0
        chomp;
183
0
0
        my $f = $self->parse($_);
184
0
0
        if ($count % 1000 == 0) {
185
0
0
            $self->log(9, "file $count: $f->{name}");
186        }
187
188
0
0
        my $success = 1;
189
0
0
        if ($f->{t} eq 'f') {
190
0
0
            $success = $self->store_file($f);
191        } elsif ($f->{t} eq 'd') {
192
0
0
                my $d = "$self->{this_backup}/$f->{name}";
193
0
0
                $d =~ s,//+,/,g;
194
0
0
                mkdir_p($d) or die "cannot mkdir $d: $!"; # XXX
195
0
0
                $self->setmeta($f);
196        } elsif ($f->{t} eq 'l') {
197
0
0
                my $l = "$self->{this_backup}/$f->{name}";
198
0
0
                unless (symlink($f->{lt}, $l)) {
199
0
0
                    die "cannot symlink $l -> $f->{lt}: $!"; # XXX
200                }
201                # $self->setmeta($f); ignore for symlinks. would need to use
202                # lchown, lchmod, etc.
203        } else {
204            # create local copy (or insert into DB only?)
205
0
0
            $self->log(5, "ignored $_\n");
206        }
207        # insert into DB.
208
0
0
        $self->db_record_version($target, $f) if ($success);
209    }
210
0
0
    $self->flush_insert_instances();
211
0
0
    $self->close_session();
212
0
0
    $self->release_fileset($target);
213
0
0
    $self->log(3, "finished backup for target host " . $target->{host} . " dir " . $target->{dir} . ": $count files");
214
0
0
    $self->{counts}{objects} += $count;
215}
216
217sub reserve_fileset {
218
0
0
0
    my ($self, $target) = @_;
219
220
0
0
    for (;;) {
221
0
0
        my $rows = $self->{dbh}->do(q{update filesets set pid=? where id = ? and pid is null}, {}, $$, $target->{id});
222
0
0
        return if $rows == 1;
223
0
0
        my $pid = $self->{dbh}->selectrow_array(q{select pid from filesets where id = ?}, {}, $target->{id});
224
0
0
        $self->log(3, "fileset $target->{id} appears to be in use by pid $pid");
225
2
2
2
0
3465
2173
8269
0
        if (!kill(0, $pid) && $!{ESRCH}) {
226
0
0
            $self->log(3, "pid $pid doesn't exist, trying to release fileset $target->{id}");
227
0
0
            $self->{dbh}->do(q{update filesets set pid=null where id = ? and pid=?}, {}, $target->{id}, $pid);
228        }
229
0
0
        sleep 60;
230    }
231}
232
233
234sub release_fileset {
235
0
0
0
    my ($self, $target) = @_;
236
237
0
0
    $self->log(3, "releasing fileset $target->{id}");
238
0
0
    $self->{dbh}->do(q{update filesets set pid=null where id=? and pid=?}, {}, $target->{id}, $$);
239}
240
241
242sub parse {
243
0
0
0
    my ($self, $s) = @_;
244
245
0
0
    my @s = split(/ +/, $s);
246
0
0
    my $f = {};
247
0
0
    $f->{name} = shift @s;
248
0
0
    $f->{name} = $1 if ($f->{name} =~ /(.*)/); # detaint XXX
249
0
0
    for (@s) {
250
0
0
        my ($k, $v) = split(/=/, $_, 2);
251
0
0
        $f->{$k} = $v;
252        # special processing for permissions etc, here?
253    }
254
0
0
    $f->{o} = unquote($f->{o});
255
0
0
    $f->{g} = unquote($f->{g});
256
0
0
    $f->{acl} = unquote($f->{acl});
257
0
0
    $f->{m} = $1 if $f->{m} =~ /^(\d+)$/;
258
0
0
    $f->{lt} = unquote($1) if defined $f->{lt} && $f->{lt} =~ /(.*)/;
259
0
0
    return $f;
260
261}
262
263sub present {
264
0
0
0
    my ($self, $f) = @_;
265
0
0
    return unless $self->{last_backup};
266
0
0
    my $st = lstat("$self->{last_backup}/$f->{name}");
267
0
0
    return unless $st;
268
0
0
    if ($st->mtime == $f->{m} &&
269        $st->size == $f->{s} &&
270        $st->uid == $self->name2uid($f->{o}) &&
271        $st->gid == $self->name2gid($f->{g}) &&
272        ($st->mode & 07777) == $self->acl2mode($f)
273    ) {
274
0
0
        return 1;
275    } else {
276
0
0
        return 0;
277    }
278}
279
280sub mkdir_p {
281
0
0
0
    my ($dir, $perm) = @_;
282
0
0
    $perm = 0777 unless(defined($perm));
283
284
0
0
    if (-d $dir) {
285
0
0
        return 1;
286    } elsif (mkdir($dir, $perm)) {
287
0
0
        return 1;
288    } elsif ($!{ENOENT}) {
289
0
0
        my $parentdir = $dir;
290
0
0
        $parentdir =~ s|(.*)/.+|$1|;
291
0
0
        mkdir_p($parentdir, $perm);
292
0
0
        if (-d $dir) {
293
0
0
            return 1;
294        } else {
295
0
0
            return mkdir($dir, $perm);
296        }
297    } else {
298
0
0
        return undef;
299    }
300}
301
302sub basedir {
303
0
1
0
    my ($self, $dir) = @_;
304
0
0
    $self->{basedir} = $dir if defined($dir);
305
0
0
    return $self->{basedir};
306}
307
308sub targets {
309
0
1
0
    my ($self, $targets) = @_;
310
0
0
    $self->{targets} = $targets if defined($targets);
311
0
0
    return $self->{targets};
312}
313
314sub add_target {
315
0
0
0
    my ($self, $target) = @_;
316
0
0
0
0
    push @{ $self->{targets} }, $target;
317
0
0
    return $self->{targets};
318}
319
320my %permstrbits = (
321    '---' => 0,
322    '--x' => 1,
323    '-w-' => 2,
324    '-wx' => 3,
325    'r--' => 4,
326    'r-x' => 5,
327    'rw-' => 6,
328    'rwx' => 7,
329);
330
331sub setmeta {
332
0
0
0
    my ($self, $f) = @_;
333
0
0
    my $fn = "$self->{this_backup}/$f->{name}";
334
0
0
    $self->log(3, "$fn is tainted!") if tainted($fn);
335
0
0
    my $mode = $self->acl2mode($f);
336
0
0
    $self->log(3, "$mode is tainted!") if tainted($mode);
337
0
0
    chown($self->name2uid($f->{o}), $self->name2gid($f->{g}), $fn);
338
0
0
    chmod($mode, $fn);
339
0
0
    utime(time, $f->{m}, $fn);
340}
341
342# computes the mode from the acl (and the set[ug]id and sticky bits)
343# and returns it. Optional ACL entries are currently ignored but should
344# eventually be returned as a second value.
345
346sub acl2mode {
347
0
0
0
    my ($self, $f) = @_;
348
349
0
0
    my $mode = 0;
350
0
0
    if ($f->{acl}) {
351
0
0
        for my $ace (split(',', $f->{acl})) {
352
0
0
            if ($ace =~ /^u::(...)$/) {
353
0
0
                $mode |= ($permstrbits{$1} << 6);
354            } elsif ($ace =~ /^g::(...)$/) {
355
0
0
                $mode |= ($permstrbits{$1} << 3);
356            } elsif ($ace =~ /^o:(...)$/) {
357
0
0
                $mode |= ($permstrbits{$1} << 0);
358            } else {
359
0
0
                $self->log(5, "warning: unknown ACE $ace ignored");
360            }
361        }
362    }
363
0
0
0
0
    if ($f->{setuid}) { $mode |= 04000 }
364
0
0
0
0
    if ($f->{setgid}) { $mode |= 02000 }
365
0
0
0
0
    if ($f->{sticky}) { $mode |= 01000 }
366
367
0
0
    return $mode;
368}
369
370my %ucache;
371sub name2uid {
372
5
0
3652
    my ($self, $uname) = @_;
373
5
28
    $uname = $1 if $uname =~ /(.*)/; # detaint
374
5
13
    return $ucache{$uname} if (defined $ucache{$uname});
375
5
16
    if ($uname =~ /^\d+$/) {
376
1
9
        return $ucache{$uname} = $uname;
377    } else {
378
4
575
        my $uid = getpwnam($uname);
379
4
13
        if (defined($uid)) {
380
3
35
            return $ucache{$uname} = $uid;
381        } else {
382
1
12
            return $ucache{$uname} = $self->{unknown_uid};
383        }
384    }
385}
386
387my %gcache;
388sub name2gid {
389
1
0
732
    my ($self, $gname) = @_;
390
1
7
    $gname = $1 if $gname =~ /(.*)/; # detaint
391
1
3
    return $gcache{$gname} if (defined $gcache{$gname});
392
1
4
    if ($gname =~ /^\d+$/) {
393
0
0
        return $gcache{$gname} = $gname;
394    } else {
395
1
104
        my $gid = getgrnam($gname);
396
1
5
        if (defined($gid)) {
397
1
12
            return $gcache{$gname} = $gid;
398        } else {
399
0
0
            return $gcache{$gname} = $self->{unknown_gid};
400        }
401    }
402}
403
404# currently used log levels:
405# 0 - fatal error, backup failed
406# 3 - global progress messages, like start and end of a backup, statistics.
407# 5 - errors which prevent a file from being backed up
408# 10 - progress messages for single files.
409sub log {
410
0
0
0
    my ($self, $level, $msg) = @_;
411
0
0
    if ($level <= $self->{log_level}) {
412
0
0
        $self->{fh_log}->print(strftime("%Y-%m-%dT%H:%M:%S%z", localtime), " $$ [$level]: $msg\n")
413            or die "write to log failed: $!";
414    }
415}
416
417sub log_level {
418
0
1
0
    my ($self, $log_level) = @_;
419
0
0
    $self->{log_level} = $log_level if defined($log_level);
420
0
0
    return $self->{log_level};
421}
422
423sub db_record_version {
424
0
0
0
    my ($self, $target, $f) = @_;
425
426
0
0
    my $t0 = gettimeofday();
427
428
0
0
    my $db_f = $self->{dbh}->selectall_arrayref("select * from files where fileset=? and path=?",
429                                                { Slice => {} },
430                                                $target->{id}, $f->{name});
431
0
0
    my $t1 = gettimeofday();
432
0
0
    $self->{times}{db_record_version_select_files} += $t1 - $t0;
433
0
0
    unless (@$db_f) {
434
0
0
        $self->{dbh}->do("insert into files(fileset, path) values(?, ?)",
435                         {},
436                         $target->{id}, $f->{name});
437
0
0
        $db_f = $self->{dbh}->selectall_arrayref("select * from files where fileset=? and path=?",
438                                                    { Slice => {} },
439                                                    $target->{id}, $f->{name});
440
441    }
442
0
0
    my $t2 = gettimeofday();
443
0
0
    $self->{times}{db_record_version_insert_files} += $t2 - $t1;
444
0
0
    if ($f->{t} eq 'f' && !defined($f->{checksum})) {
445        # this must be a link to the previous version
446
0
0
        $f->{checksum} = $self->get_checksum($db_f->[0]{id});
447    }
448
0
0
    my $t2a = gettimeofday();
449
0
0
    $self->{times}{db_record_version_versions2_get_checksum} += $t2a - $t2;
450
451
0
0
    my $query =
452        "select id from versions2
453         where
454            file_type=? and file_size=? and file_mtime=? and
455            file_owner=? and file_group=? and file_acl=? and
456            file_unix_bits=?
457        ";
458
0
0
    my @args = (
459        $f->{t}, $f->{s}, $f->{m},
460        $f->{o}, $f->{g}, $f->{acl},
461
0
0
        join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)),
462    );
463
0
0
    if ($f->{t} eq 'f') {
464
0
0
        $query .= " and checksum=?";
465
0
0
        push @args, $f->{checksum};
466    } elsif ($f->{t} eq 'l') {
467
0
0
        $query .= " and file_linktarget=?";
468
0
0
        push @args, $f->{lt};
469    } elsif ($f->{t} eq 'b' || $f->{t} eq 'c') {
470
0
0
        $query .= " and file_rdev=?";
471
0
0
        push @args, $f->{rdev};
472    }
473
474
0
0
    my $version_id = $self->{dbh}->selectrow_array($query, {}, @args);
475
0
0
    my $t2b = gettimeofday();
476
0
0
    $self->{times}{db_record_version_versions2_get_version_id} += $t2b - $t2a;
477
0
0
    unless ($version_id) {
478        # XXX why is $f->{checksum} undef here for ./bin/dash?
479
0
0
        $self->{dbh}->do("insert into versions2(
480                                                file_type, file_size, file_mtime,
481                                                file_owner, file_group, file_acl,
482                                                file_unix_bits,
483                                                file_rdev,
484                                                checksum, file_linktarget
485                                               )
486                          values(
487                                 ?, ?, ?,
488                                 ?, ?, ?,
489                                 ?,
490                                 ?,
491                                 ?, ?
492                                )",
493                         {},
494                         $f->{t}, $f->{s}, $f->{m},
495                         $f->{o}, $f->{g}, $f->{acl},
496
0
0
                         join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)),
497                         $f->{rdev},
498                         $f->{checksum}, $f->{lt},
499                        );
500
0
0
        $version_id = $self->{dbh}->{mysql_insertid};
501    }
502
0
0
    my $t3 = gettimeofday();
503
0
0
    $self->{times}{db_record_version_versions2} += $t3 - $t2;
504
0
0
    $self->{times}{db_record_version_versions2_insert} += $t3 - $t2b;
505
0
0
0
0
    push @{ $self->{caches}{insert_instances} },
506         [
507             $db_f->[0]{id},
508             $f->{id},
509             time(), 1,
510             $self->{session_id}, $version_id
511         ];
512
0
0
0
0
    if (@{ $self->{caches}{insert_instances} } > 10) {
513
0
0
        $self->flush_insert_instances();
514    }
515
516
0
0
    my $t4 = gettimeofday();
517
0
0
    $self->{times}{db_record_version_insert_instances} += $t4 - $t3;
518
0
0
    $self->{times}{db_record_version} += $t4 - $t0;
519}
520
521sub get_checksum {
522
0
0
0
    my ($self, $file) = @_;
523
0
0
    if ($self->{caches}{file_checksums}{$self->{last_backup_id}}) {
524
0
0
        return $self->{caches}{file_checksums}{$self->{last_backup_id}}{$file};
525    } else {
526
0
0
        $self->{caches}{file_checksums}{$self->{last_backup_id}} = my $file_checksums = {};
527
0
0
        my $sth = $self->{dbh}->prepare(
528                                    "select file, checksum from versions2 v, instances i
529                                     where session=? and v.id = i.version");
530
0
0
        $sth->execute($self->{last_backup_id});
531
0
0
        while (my ($file, $checksum) = $sth->fetchrow_array) {
532
0
0
            $file_checksums->{$file} = $checksum;
533        }
534
535
0
0
        return $file_checksums->{$file};
536    }
537}
538
539sub flush_insert_instances {
540
0
0
0
    my ($self) = @_;
541
0
0
    my $dbh = $self->{dbh};
542
543
0
0
0
0
    if (@{ $self->{caches}{insert_instances} }) {
544
0
0
        my $cmd = "insert into instances(file, file_id, date, online, session, version)"
545            . " values "
546            . join(", ",
547                   map {
548
0
0
                         "("
549
0
0
                         . join(",", map { $dbh->quote($_) } @$_)
550                         . ")"
551
0
0
                       } @{ $self->{caches}{insert_instances} }
552                  );
553
0
0
        $dbh->do($cmd);
554    }
555
0
0
    $dbh->commit();
556
0
0
    $self->{caches}{insert_instances} = [];
557}
558
559
560sub new_session {
561
0
0
0
    my ($self) = @_;
562
0
0
    $self->{dbh}->do("insert into sessions(start_date, prefix) values(?, ?)", {}, time(), $self->{this_backup});
563
0
0
    $self->{session_id} = $self->{dbh}->{'mysql_insertid'};
564}
565
566sub close_session {
567
0
0
0
    my ($self) = @_;
568
0
0
    $self->{dbh}->do("update sessions set end_date=? where id=?", {}, time(), $self->{session_id});
569
0
0
    $self->close_file_connection;
570
0
0
    delete $self->{target};
571}
572
573sub close_file_connection {
574
0
0
0
    my ($self) = @_;
575
0
0
    if ($self->{file_pid}) {
576
0
0
        close($self->{file_cfd});
577
0
0
        close($self->{file_dfd});
578
579
0
0
        $self->log(3, "waiting for $self->{file_pid}");
580
0
0
        waitpid $self->{file_pid}, 0;
581
0
0
        $self->log(3, "$self->{file_pid} terminated with status $?");
582
0
0
        delete $self->{file_cfd};
583
0
0
        delete $self->{file_dfd};
584
0
0
        delete $self->{file_pid};
585    }
586}
587
588sub get_last_session {
589
0
0
0
    my ($self) = @_;
590
0
0
    my $sessions
591        = $self->{dbh}->selectall_arrayref(
592            "select * from sessions where end_date is not null and prefix like ? order by end_date desc",
593            { Slice => {} },
594            $self->{basedir} . '/%/' . $self->{target}->{host} . '/' . $self->{target}->{dir}
595          );
596
0
0
    $self->{last_backup} = $sessions->[0]{prefix};
597
0
0
    $self->{last_backup_id} = $sessions->[0]{id};
598}
599
600 - 611
=head2 linkdup

Try to find a duplicate of the current file in the database and replace the 
current file with a hardlink to it. This is useful if you
have multiple copies of a file stored in different locations.
The search starts from the newest session and continues into the past until
either linking successful or we run out of duplicates.
This is done because creating a hard link may not always be possible (duplicate is 
on a different file system or has already reached the maximum link count)
and it is more likely that we can link to new copies than to old ones.

=cut
612
613sub linkdup {
614
0
1
0
    my ($self, $f, $backup_filename) = @_;
615
0
0
    my $t0 = gettimeofday();
616    # XXX - this seems to be slow
617    # XXX - creates huge temp files. Restrict to last few sessions or at least sessions on the same device?
618    # XXX - that's not quite as simple: We only have the prefix, but there are many prefixes on the same
619    # device. We can create a list of them of them at first call, though and then pass the list
620    # to the query. Maybe even shorten the list. ($n newest sessions only)
621    # XXX - another possible optimization is to check the last few files we've written: .svn/prop-base
622    # normally contains a lot of identical files.
623
624
0
0
    unless ($self->{sessions_on_same_device}) {
625
0
0
        my $st = stat($backup_filename);
626
0
0
        my $my_dev = defined $st ? $st->dev : ""; # can this happen?
627
0
0
        my $sth = $self->{dbh}->prepare("select * from sessions order by id desc");
628
0
0
        $sth->execute();
629
0
0
        while (my $r = $sth->fetchrow_hashref()) {
630
0
0
            my $st = lstat $r->{prefix};
631
0
0
            my $dev = defined $st ? $st->dev : "";;
632
0
0
            next unless $dev eq $my_dev;
633
0
0
0
0
            last if $self->{sessions_on_same_device} && @{ $self->{sessions_on_same_device} } > 30;
634
0
0
0
0
            push @{ $self->{sessions_on_same_device} }, $r;
635        }
636
637    }
638
0
0
    my $sth = $self->{dbh}->prepare("select * from versions2, instances, files, sessions
639                                     where file_type=? and file_size=? and file_mtime=?
640                                       and file_owner=? and file_group=? and file_acl=?
641                                       and file_unix_bits=?
642                                       and checksum=? and online=1
643                                       and instances.file=files.id
644                                       and versions2.id=instances.version
645                                       and instances.session=sessions.id
646
0
0
                                       and sessions.id in (" . join(", ", map("?", @{ $self->{sessions_on_same_device} })) . ")" .
647                                    " order by instances.session desc
648                                    ");
649
0
0
    $sth->execute(
650             $f->{t}, $f->{s}, $f->{m},
651             $f->{o}, $f->{g}, $f->{acl},
652
0
0
             join(',', map {$f->{$_} ? ($_) : ()} qw(setuid setgid sticky)),
653             $f->{checksum},
654
0
0
             @{ $self->{sessions_on_same_device} },
655          );
656
0
0
    my $st = stat($backup_filename);
657
0
0
    my $my_dev = defined $st ? $st->dev : "";
658
0
0
    while (my $r = $sth->fetchrow_hashref()) {
659
660        # check if old file is on same device. If it isn't, skip it.
661        # XXX - this should now be obsolete because we already selected only matching sessions above.
662
0
0
        unless ($self->{prefix_device}{$r->{prefix}}) {
663
0
0
            my $st = lstat $r->{prefix};
664
0
0
            $self->{prefix_device}{$r->{prefix}}
665                = defined $st ? $st->dev : "";
666        }
667
0
0
        next unless $self->{prefix_device}{$r->{prefix}} eq $my_dev;
668
669
0
0
        my $oldfile = "$r->{prefix}/$r->{path}";
670
0
0
        if (my $st = lstat($oldfile)) {
671
0
0
            if ($st->mtime == $f->{m} &&
672                $st->size == $f->{s} &&
673                $st->uid == $self->name2uid($f->{o}) &&
674                $st->gid == $self->name2gid($f->{g}) &&
675                ($st->mode & 07777) == $self->acl2mode($f)
676            ) {
677
0
0
                my ($dirname, $basename) = $backup_filename =~ m{(.*)/(.*)};
678
0
0
                my $tmpname = "$basename.$$.simba_backup";
679
0
0
                if (length($tmpname) > 255) {
680
0
0
                    $tmpname = substr($basename, 0, 235) . ".$$.simba_backup";
681                }
682
0
0
                $tmpname = "$dirname/$tmpname";
683
0
0
                rename($backup_filename, "$tmpname") or die "cannot save $backup_filename to $tmpname: $!";
684
0
0
                if (link($oldfile, $backup_filename)) {
685
0
0
                    $self->log(10, "linked (dup)");
686
0
0
                    unlink("$tmpname") or die "cannot unlink $tmpname: $!";
687
0
0
                    $sth->finish();
688
0
0
                    my $t1 = gettimeofday();
689
0
0
                    $self->{counts}{dup2}++;
690
0
0
                    $self->{times}{linkdup} += $t1 - $t0;
691
0
0
                    return $oldfile;
692                } else {
693
0
0
                    $self->log(5, "cannot link $oldfile to $backup_filename");
694
0
0
                    rename("$tmpname", $backup_filename) or die "cannot restore $backup_filename from $tmpname: $!";
695                }
696            }
697        }
698    }
699
0
0
    my $t1 = gettimeofday();
700
0
0
    $self->{times}{linkdup} += $t1 - $t0;
701
0
0
    return;
702}
703
704 - 712
=head2 store_file 

store a file in the local filesystem. If the file appears to be unchanged since
the last backup, try to create a hard link. Otherwise, get the contents of the
file from the DA, and search for a file with the same contents (i.e., checksum)
and metadata, but possibly different name and try to link to that. If no link
can be created to an existing file, create a new one.

=cut
713
714sub store_file {
715
0
1
0
    my ($self, $f) = @_;
716
717
0
0
    my $success = 1;
718
719
0
0
    if($self->present($f)) {
720
0
0
        if (link("$self->{last_backup}/$f->{name}", "$self->{this_backup}/$f->{name}")) {
721
0
0
            $self->log(10, "linked");
722
0
0
            $self->{counts}{dup1}++;
723
0
0
            return $success;
724        } else {
725
0
0
            $self->log(5, "cannot link $self->{last_backup}/$f->{name} to $self->{this_backup}/$f->{name}: $!");
726        }
727    }
728
729    # else request from da
730
0
0
    unless ($self->{file_pid}) {
731
0
0
        $self->{file_pid} = open2($self->{file_dfd}, $self->{file_cfd},
732                          "/usr/bin/ssh",
733                          "-l", "simba_da",
734                          $self->{ssh_id_file} ? ("-i", $self->{ssh_id_file}) : (),
735                          $self->{target}->{host}, "da");
736    }
737
0
0
    $self->{file_cfd}->printflush("get $self->{target}->{dir}/$f->{name}\n"); # XXX - encode!
738
0
0
    my $header = $self->{file_dfd}->getline; # this should be the same as $_ - check?
739
0
0
    if ($header =~ /^data (.*)/) {
740
0
0
        my $f2 = $self->parse($1);
741
0
0
        my $backup_filename = "$self->{this_backup}/$f->{name}";
742
0
0
        my $file_bfd;
743
0
0
        unless (open($file_bfd, '>:raw', $backup_filename)) {
744
0
0
            $self->log(5, "cannot open backup file $backup_filename: $!");
745            # XXX - There may be some errors from which we can recover, e.g., for
746            # "File name too long" we could just shorten the file name. But for
747            # now we just skip the file:
748            # XXX - some other errors are almost certainly fatal, e.g., ENOENT
749            # probably means that our backup device has been unmounted (BTDT).
750
0
0
            $self->close_file_connection;
751
0
0
            return 0;
752        }
753
0
0
        my $size = $f2->{s};
754
0
0
        my $err;
755
0
0
        my $sha1 = Digest::SHA->new(1);
756
757
0
0
        while ($size > 0) {
758
0
0
            my $buffer;
759
0
0
            my $rc = read($self->{file_dfd}, $buffer, min($size, $BUFSIZE));
760
0
0
            if (!defined($rc)) {
761                # I/O error
762
0
0
                $self->log(5, "error reading from data socket: $!");
763
0
0
                last;
764            } elsif ($rc == 0) {
765                # premature EOF.
766
0
0
                $self->log(5, "unexpected EOF reading from data socket");
767
0
0
                last;
768            }
769
0
0
            $file_bfd->print($buffer) or die "write to backup failed: $!";
770
0
0
            $size -= length($buffer);
771
0
0
            $sha1->add($buffer);
772        }
773
0
0
        close($file_bfd) or die "write to backup failed: $!";
774
0
0
        my $trailer = $self->{file_dfd}->getline; # should be empty line
775
0
0
        $trailer = $self->{file_dfd}->getline;
776
0
0
        if ($trailer =~ /^fail /) {
777
0
0
            $self->log(5, $trailer);
778
0
0
            $success = 0;
779        } elsif ($trailer =~ /^chk sha1 (\w+)/) {
780
0
0
            my $checksum = $sha1->hexdigest;
781
0
0
            if ($checksum ne $1) {
782
0
0
                $self->log(5, "checksum error\n");
783            }
784
0
0
            $f->{checksum} = $checksum;
785        } else {
786
0
0
            $self->log(5, "unexpected trailer $trailer\n");
787
0
0
            $self->close_file_connection;
788
0
0
            $success = 0;
789        }
790
0
0
        unless ($self->linkdup($f, $backup_filename)) {
791
0
0
            $self->setmeta($f);
792
0
0
            $self->log(10, "stored");
793        }
794    } else {
795
0
0
        $self->log(5, "unexpected header $header\n");
796
0
0
        $self->close_file_connection;
797
0
0
        $success = 0;
798    }
799
0
0
    return $success;
800}
801
802sub DESTROY {
803
3
2071
    my ($self) = @_;
804
3
491
    $self->{dbh}->disconnect();
805}
806
807
808# vim: tw=0 expandtab
8091;