| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
|
|
| #include <config.h> |
|
|
| #include <ctype.h> |
| #include <getopt.h> |
| #include <sys/types.h> |
| #include <signal.h> |
|
|
| #include "system.h" |
|
|
| #include <regex.h> |
|
|
| #include "c-ctype.h" |
| #include "fd-reopen.h" |
| #include "quote.h" |
| #include "safe-read.h" |
| #include "stdio--.h" |
| #include "xdectoint.h" |
| #include "xstrtol.h" |
|
|
| |
| #define PROGRAM_NAME "csplit" |
|
|
| #define AUTHORS \ |
| proper_name ("Stuart Kemp"), \ |
| proper_name ("David MacKenzie") |
|
|
| |
| #define DEFAULT_PREFIX "xx" |
|
|
| |
| struct control |
| { |
| intmax_t offset; |
| intmax_t lines_required; |
| intmax_t repeat; |
| int argnum; |
| bool repeat_forever; |
| bool ignore; |
| bool regexpr; |
| struct re_pattern_buffer re_compiled; |
| }; |
|
|
| |
| #define START_SIZE 8191 |
|
|
| |
| #define CTRL_SIZE 80 |
|
|
| #ifdef DEBUG |
| |
| # define START_SIZE 200 |
| # define CTRL_SIZE 1 |
| #endif |
|
|
| |
| struct cstring |
| { |
| idx_t len; |
| char *str; |
| }; |
|
|
| |
| |
| struct line |
| { |
| idx_t used; |
| idx_t insert_index; |
| idx_t retrieve_index; |
| struct cstring starts[CTRL_SIZE]; |
| struct line *next; |
| }; |
|
|
| |
| |
| |
| struct buffer_record |
| { |
| idx_t bytes_alloc; |
| idx_t bytes_used; |
| intmax_t start_line; |
| intmax_t first_available; |
| idx_t num_lines; |
| char *buffer; |
| struct line *line_start; |
| struct line *curr_line; |
| struct buffer_record *next; |
| }; |
|
|
| static void close_output_file (void); |
| static void create_output_file (void); |
| static void delete_all_files (bool); |
| static void save_line_to_file (const struct cstring *line); |
|
|
| |
| static struct buffer_record *head = nullptr; |
|
|
| |
| static char *hold_area = nullptr; |
|
|
| |
| static idx_t hold_count = 0; |
|
|
| |
| static intmax_t last_line_number = 0; |
|
|
| |
| static intmax_t current_line = 0; |
|
|
| |
| static bool have_read_eof = false; |
|
|
| |
| static char *volatile filename_space = nullptr; |
|
|
| |
| static char const *volatile prefix = nullptr; |
|
|
| |
| static char *volatile suffix = nullptr; |
|
|
| |
| static int volatile digits = 2; |
|
|
| |
| static int volatile files_created = 0; |
|
|
| |
| static intmax_t bytes_written; |
|
|
| |
| static FILE *output_stream = nullptr; |
|
|
| |
| static char *output_filename = nullptr; |
|
|
| |
| static char **global_argv; |
|
|
| |
| static bool suppress_count; |
|
|
| |
| static bool volatile remove_files; |
|
|
| |
| static bool elide_empty_files; |
|
|
| |
| static bool suppress_matched; |
|
|
| |
| |
| static struct control *controls; |
|
|
| |
| static idx_t control_used; |
|
|
| |
| static sigset_t caught_signals; |
|
|
| |
| |
| enum |
| { |
| SUPPRESS_MATCHED_OPTION = CHAR_MAX + 1 |
| }; |
|
|
| static struct option const longopts[] = |
| { |
| {"digits", required_argument, nullptr, 'n'}, |
| {"quiet", no_argument, nullptr, 'q'}, |
| {"silent", no_argument, nullptr, 's'}, |
| {"keep-files", no_argument, nullptr, 'k'}, |
| {"elide-empty-files", no_argument, nullptr, 'z'}, |
| {"prefix", required_argument, nullptr, 'f'}, |
| {"suffix-format", required_argument, nullptr, 'b'}, |
| {"suppress-matched", no_argument, nullptr, SUPPRESS_MATCHED_OPTION}, |
| {GETOPT_HELP_OPTION_DECL}, |
| {GETOPT_VERSION_OPTION_DECL}, |
| {nullptr, 0, nullptr, 0} |
| }; |
|
|
| |
| |
|
|
| static void |
| cleanup (void) |
| { |
| sigset_t oldset; |
|
|
| close_output_file (); |
|
|
| sigprocmask (SIG_BLOCK, &caught_signals, &oldset); |
| delete_all_files (false); |
| sigprocmask (SIG_SETMASK, &oldset, nullptr); |
| } |
|
|
| static _Noreturn void |
| cleanup_fatal (void) |
| { |
| cleanup (); |
| exit (EXIT_FAILURE); |
| } |
|
|
| extern void |
| xalloc_die (void) |
| { |
| error (0, 0, "%s", _("memory exhausted")); |
| cleanup_fatal (); |
| } |
|
|
| static void |
| interrupt_handler (int sig) |
| { |
| delete_all_files (true); |
| signal (sig, SIG_DFL); |
| |
| |
| |
| raise (sig); |
| } |
|
|
| |
| |
|
|
| static void |
| save_to_hold_area (char *start, idx_t num) |
| { |
| free (hold_area); |
| hold_area = start; |
| hold_count = num; |
| } |
|
|
| |
| |
|
|
| static idx_t |
| read_input (char *dest, idx_t max_n_bytes) |
| { |
| if (max_n_bytes == 0) |
| return 0; |
|
|
| ptrdiff_t bytes_read = safe_read (STDIN_FILENO, dest, max_n_bytes); |
|
|
| if (bytes_read == 0) |
| have_read_eof = true; |
|
|
| if (bytes_read < 0) |
| { |
| error (0, errno, _("read error")); |
| cleanup_fatal (); |
| } |
|
|
| return bytes_read; |
| } |
|
|
| |
|
|
| static void |
| clear_line_control (struct line *p) |
| { |
| p->used = 0; |
| p->insert_index = 0; |
| p->retrieve_index = 0; |
| } |
|
|
| |
|
|
| static struct line * |
| new_line_control (void) |
| { |
| struct line *p = xmalloc (sizeof *p); |
|
|
| p->next = nullptr; |
| clear_line_control (p); |
|
|
| return p; |
| } |
|
|
| |
| |
|
|
| static void |
| keep_new_line (struct buffer_record *b, char *line_start, idx_t line_len) |
| { |
| struct line *l; |
|
|
| |
| if (b->line_start == nullptr) |
| b->line_start = b->curr_line = new_line_control (); |
|
|
| |
| if (b->curr_line->used == CTRL_SIZE) |
| { |
| b->curr_line->next = new_line_control (); |
| b->curr_line = b->curr_line->next; |
| } |
|
|
| l = b->curr_line; |
|
|
| |
| l->starts[l->insert_index].str = line_start; |
| l->starts[l->insert_index].len = line_len; |
| l->used++; |
| l->insert_index++; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| static idx_t |
| record_line_starts (struct buffer_record *b) |
| { |
| char *line_start; |
| idx_t lines; |
| idx_t line_length; |
|
|
| if (b->bytes_used == 0) |
| return 0; |
|
|
| lines = 0; |
| line_start = b->buffer; |
| char *buffer_end = line_start + b->bytes_used; |
| *buffer_end = '\n'; |
|
|
| while (true) |
| { |
| char *line_end = rawmemchr (line_start, '\n'); |
| if (line_end == buffer_end) |
| break; |
| line_length = line_end - line_start + 1; |
| keep_new_line (b, line_start, line_length); |
| line_start = line_end + 1; |
| lines++; |
| } |
|
|
| |
| idx_t bytes_left = buffer_end - line_start; |
| if (bytes_left) |
| { |
| if (have_read_eof) |
| { |
| keep_new_line (b, line_start, bytes_left); |
| lines++; |
| } |
| else |
| save_to_hold_area (ximemdup (line_start, bytes_left), bytes_left); |
| } |
|
|
| b->num_lines = lines; |
| b->first_available = b->start_line = last_line_number + 1; |
| last_line_number += lines; |
|
|
| return lines; |
| } |
|
|
| |
| #if 13 <= __GNUC__ |
| # pragma GCC diagnostic ignored "-Wanalyzer-mismatching-deallocation" |
| # pragma GCC diagnostic ignored "-Wanalyzer-use-after-free" |
| # pragma GCC diagnostic ignored "-Wanalyzer-use-of-uninitialized-value" |
| #endif |
|
|
| static void |
| free_buffer (struct buffer_record *buf) |
| { |
| for (struct line *l = buf->line_start; l;) |
| { |
| struct line *n = l->next; |
| free (l); |
| l = n; |
| } |
| free (buf->buffer); |
| free (buf); |
| } |
|
|
| |
|
|
| static ATTRIBUTE_DEALLOC (free_buffer, 1) |
| struct buffer_record * |
| get_new_buffer (idx_t min_size) |
| { |
| struct buffer_record *new_buffer = xmalloc (sizeof *new_buffer); |
| new_buffer->bytes_alloc = 0; |
| new_buffer->buffer = xpalloc (nullptr, &new_buffer->bytes_alloc, min_size, |
| -1, 1); |
| new_buffer->bytes_used = 0; |
| new_buffer->start_line = new_buffer->first_available = last_line_number + 1; |
| new_buffer->num_lines = 0; |
| new_buffer->line_start = new_buffer->curr_line = nullptr; |
| new_buffer->next = nullptr; |
|
|
| return new_buffer; |
| } |
|
|
| |
| |
|
|
| static void |
| save_buffer (struct buffer_record *buf) |
| { |
| struct buffer_record *p; |
|
|
| buf->next = nullptr; |
| buf->curr_line = buf->line_start; |
|
|
| if (head == nullptr) |
| head = buf; |
| else |
| { |
| for (p = head; p->next; p = p->next) |
| ; |
| p->next = buf; |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| static bool |
| load_buffer (void) |
| { |
| if (have_read_eof) |
| return false; |
|
|
| |
| |
| |
| idx_t bytes_wanted = MAX (START_SIZE, hold_count + 1); |
|
|
| while (true) |
| { |
| struct buffer_record *b = get_new_buffer (bytes_wanted); |
| idx_t bytes_alloc = b->bytes_alloc; |
| idx_t bytes_avail = bytes_alloc; |
| char *p = b->buffer; |
|
|
| |
| if (hold_count) |
| { |
| p = mempcpy (p, hold_area, hold_count); |
| b->bytes_used += hold_count; |
| bytes_avail -= hold_count; |
| hold_count = 0; |
| } |
|
|
| b->bytes_used += read_input (p, bytes_avail - 1); |
|
|
| if (record_line_starts (b) != 0) |
| { |
| save_buffer (b); |
| return true; |
| } |
|
|
| free_buffer (b); |
| if (have_read_eof) |
| return false; |
| if (ckd_add (&bytes_wanted, bytes_alloc, bytes_alloc >> 1)) |
| xalloc_die (); |
| } |
| } |
|
|
| |
| |
|
|
| static intmax_t |
| get_first_line_in_buffer (void) |
| { |
| if (head == nullptr && !load_buffer ()) |
| return 0; |
|
|
| return head->first_available; |
| } |
|
|
| |
| |
| |
|
|
| static struct cstring * |
| remove_line (void) |
| { |
| |
| |
| |
| static struct buffer_record *prev_buf = nullptr; |
|
|
| struct cstring *line; |
| struct line *l; |
|
|
| if (prev_buf) |
| { |
| free_buffer (prev_buf); |
| prev_buf = nullptr; |
| } |
|
|
| if (head == nullptr && !load_buffer ()) |
| return nullptr; |
|
|
| if (current_line < head->first_available) |
| current_line = head->first_available; |
|
|
| ++(head->first_available); |
|
|
| l = head->curr_line; |
|
|
| line = &l->starts[l->retrieve_index]; |
|
|
| |
| if (++l->retrieve_index == l->used) |
| { |
| |
| head->curr_line = l->next; |
| if (head->curr_line == nullptr || head->curr_line->used == 0) |
| { |
| |
| |
| |
| prev_buf = head; |
| head = head->next; |
| } |
| } |
|
|
| return line; |
| } |
|
|
| |
| |
|
|
| static struct cstring * |
| find_line (intmax_t linenum) |
| { |
| struct buffer_record *b; |
|
|
| if (head == nullptr && !load_buffer ()) |
| return nullptr; |
|
|
| if (linenum < head->start_line) |
| return nullptr; |
|
|
| for (b = head;;) |
| { |
| if (linenum < b->start_line + b->num_lines) |
| { |
| |
| struct line *l; |
| idx_t offset; |
|
|
| l = b->line_start; |
| offset = linenum - b->start_line; |
| |
| while (offset >= CTRL_SIZE) |
| { |
| l = l->next; |
| offset -= CTRL_SIZE; |
| } |
| return &l->starts[offset]; |
| } |
| if (b->next == nullptr && !load_buffer ()) |
| return nullptr; |
| b = b->next; |
| } |
| } |
|
|
| |
|
|
| static bool |
| no_more_lines (void) |
| { |
| return find_line (current_line + 1) == nullptr; |
| } |
|
|
| |
|
|
| static void |
| set_input_file (char const *name) |
| { |
| if (! streq (name, "-") && fd_reopen (STDIN_FILENO, name, O_RDONLY, 0) < 0) |
| error (EXIT_FAILURE, errno, _("cannot open %s for reading"), |
| quoteaf (name)); |
| } |
|
|
| |
| |
| |
| |
|
|
| static void |
| write_to_file (intmax_t last_line, bool ignore, int argnum) |
| { |
| struct cstring *line; |
| intmax_t first_line; |
| intmax_t lines; |
| intmax_t i; |
|
|
| first_line = get_first_line_in_buffer (); |
|
|
| if (! first_line || first_line > last_line) |
| { |
| error (0, 0, _("%s: line number out of range"), |
| quote (global_argv[argnum])); |
| cleanup_fatal (); |
| } |
|
|
| lines = last_line - first_line; |
|
|
| for (i = 0; i < lines; i++) |
| { |
| line = remove_line (); |
| if (line == nullptr) |
| { |
| error (0, 0, _("%s: line number out of range"), |
| quote (global_argv[argnum])); |
| cleanup_fatal (); |
| } |
| if (!ignore) |
| save_line_to_file (line); |
| } |
| } |
|
|
| |
|
|
| static void |
| dump_rest_of_file (void) |
| { |
| struct cstring *line; |
|
|
| while ((line = remove_line ()) != nullptr) |
| save_line_to_file (line); |
| } |
|
|
| |
| |
|
|
| static void |
| handle_line_error (const struct control *p, intmax_t repetition) |
| { |
| char buf[INT_BUFSIZE_BOUND (intmax_t)]; |
|
|
| fprintf (stderr, _("%s: %s: line number out of range"), |
| program_name, quote (imaxtostr (p->lines_required, buf))); |
| if (repetition) |
| fprintf (stderr, _(" on repetition %jd\n"), repetition); |
| else |
| fprintf (stderr, "\n"); |
|
|
| cleanup_fatal (); |
| } |
|
|
| |
| |
| |
| |
|
|
| static void |
| process_line_count (const struct control *p, intmax_t repetition) |
| { |
| intmax_t linenum; |
| intmax_t last_line_to_save = p->lines_required * (repetition + 1); |
|
|
| create_output_file (); |
|
|
| |
| |
| |
| if (no_more_lines () && suppress_matched) |
| handle_line_error (p, repetition); |
|
|
| if (!(linenum = get_first_line_in_buffer ())) |
| handle_line_error (p, repetition); |
|
|
| while (linenum++ < last_line_to_save) |
| { |
| struct cstring *line = remove_line (); |
| if (line == nullptr) |
| handle_line_error (p, repetition); |
| save_line_to_file (line); |
| } |
|
|
| close_output_file (); |
|
|
| if (suppress_matched) |
| remove_line (); |
|
|
| |
| |
| if (no_more_lines () && !suppress_matched) |
| handle_line_error (p, repetition); |
| } |
|
|
| static void |
| regexp_error (struct control *p, intmax_t repetition, bool ignore) |
| { |
| fprintf (stderr, _("%s: %s: match not found"), |
| program_name, quote (global_argv[p->argnum])); |
|
|
| if (repetition) |
| fprintf (stderr, _(" on repetition %jd\n"), repetition); |
| else |
| fprintf (stderr, "\n"); |
|
|
| if (!ignore) |
| { |
| dump_rest_of_file (); |
| close_output_file (); |
| } |
| cleanup_fatal (); |
| } |
|
|
| |
| |
| |
|
|
| static void |
| process_regexp (struct control *p, intmax_t repetition) |
| { |
| struct cstring *line; |
| idx_t line_len; |
| intmax_t break_line; |
| bool ignore = p->ignore; |
| regoff_t ret; |
|
|
| if (!ignore) |
| create_output_file (); |
|
|
| |
| |
|
|
| if (p->offset >= 0) |
| { |
| while (true) |
| { |
| line = find_line (++current_line); |
| if (line == nullptr) |
| { |
| if (p->repeat_forever) |
| { |
| if (!ignore) |
| { |
| dump_rest_of_file (); |
| close_output_file (); |
| } |
| exit (EXIT_SUCCESS); |
| } |
| else |
| regexp_error (p, repetition, ignore); |
| } |
| line_len = line->len; |
| if (line->str[line_len - 1] == '\n') |
| line_len--; |
| ret = re_search (&p->re_compiled, line->str, line_len, |
| 0, line_len, nullptr); |
| if (ret == -2) |
| { |
| error (0, 0, _("error in regular expression search")); |
| cleanup_fatal (); |
| } |
| if (ret == -1) |
| { |
| line = remove_line (); |
| if (!ignore) |
| save_line_to_file (line); |
| } |
| else |
| break; |
| } |
| } |
| else |
| { |
| |
| while (true) |
| { |
| line = find_line (++current_line); |
| if (line == nullptr) |
| { |
| if (p->repeat_forever) |
| { |
| if (!ignore) |
| { |
| dump_rest_of_file (); |
| close_output_file (); |
| } |
| exit (EXIT_SUCCESS); |
| } |
| else |
| regexp_error (p, repetition, ignore); |
| } |
| line_len = line->len; |
| if (line->str[line_len - 1] == '\n') |
| line_len--; |
| ret = re_search (&p->re_compiled, line->str, line_len, |
| 0, line_len, nullptr); |
| if (ret == -2) |
| { |
| error (0, 0, _("error in regular expression search")); |
| cleanup_fatal (); |
| } |
| if (ret != -1) |
| break; |
| } |
| } |
|
|
| |
| break_line = current_line + p->offset; |
|
|
| write_to_file (break_line, ignore, p->argnum); |
|
|
| if (!ignore) |
| close_output_file (); |
|
|
| if (p->offset > 0) |
| current_line = break_line; |
|
|
| if (suppress_matched) |
| remove_line (); |
| } |
|
|
| |
|
|
| static void |
| split_file (void) |
| { |
| for (idx_t i = 0; i < control_used; i++) |
| { |
| intmax_t j; |
| if (controls[i].regexpr) |
| { |
| for (j = 0; (controls[i].repeat_forever |
| || j <= controls[i].repeat); j++) |
| process_regexp (&controls[i], j); |
| } |
| else |
| { |
| for (j = 0; (controls[i].repeat_forever |
| || j <= controls[i].repeat); j++) |
| process_line_count (&controls[i], j); |
| } |
| } |
|
|
| create_output_file (); |
| dump_rest_of_file (); |
| close_output_file (); |
| } |
|
|
| |
| |
| |
| |
| |
| |
|
|
| static char * |
| make_filename (int num) |
| { |
| strcpy (filename_space, prefix); |
| if (suffix) |
| sprintf (filename_space + strlen (prefix), suffix, num); |
| else |
| sprintf (filename_space + strlen (prefix), "%0*d", digits, num); |
| return filename_space; |
| } |
|
|
| |
|
|
| static void |
| create_output_file (void) |
| { |
| int nfiles = files_created; |
| bool fopen_ok; |
| int fopen_errno; |
|
|
| output_filename = make_filename (nfiles); |
|
|
| if (nfiles == INT_MAX) |
| { |
| fopen_ok = false; |
| fopen_errno = EOVERFLOW; |
| } |
| else |
| { |
| |
| sigset_t oldset; |
| sigprocmask (SIG_BLOCK, &caught_signals, &oldset); |
| output_stream = fopen (output_filename, "w"); |
| fopen_ok = (output_stream != nullptr); |
| fopen_errno = errno; |
| files_created = nfiles + fopen_ok; |
| sigprocmask (SIG_SETMASK, &oldset, nullptr); |
| } |
|
|
| if (! fopen_ok) |
| { |
| error (0, fopen_errno, "%s", quotef (output_filename)); |
| cleanup_fatal (); |
| } |
| bytes_written = 0; |
| } |
|
|
| |
| |
|
|
| static void |
| delete_all_files (bool in_signal_handler) |
| { |
| if (! remove_files) |
| return; |
|
|
| for (int i = files_created; 0 <= --i; ) |
| { |
| char const *name = make_filename (i); |
| if (unlink (name) != 0 && errno != ENOENT && !in_signal_handler) |
| error (0, errno, "%s", quotef (name)); |
| } |
|
|
| files_created = 0; |
| } |
|
|
| |
| |
|
|
| static void |
| close_output_file (void) |
| { |
| if (output_stream) |
| { |
| if (ferror (output_stream)) |
| { |
| error (0, 0, _("write error for %s"), quoteaf (output_filename)); |
| output_stream = nullptr; |
| cleanup_fatal (); |
| } |
| if (fclose (output_stream) != 0) |
| { |
| error (0, errno, "%s", quotef (output_filename)); |
| output_stream = nullptr; |
| cleanup_fatal (); |
| } |
| if (bytes_written == 0 && elide_empty_files) |
| { |
| sigset_t oldset; |
| bool unlink_ok; |
| int unlink_errno; |
|
|
| |
| sigprocmask (SIG_BLOCK, &caught_signals, &oldset); |
| unlink_ok = (unlink (output_filename) == 0); |
| unlink_errno = errno; |
| files_created--; |
| sigprocmask (SIG_SETMASK, &oldset, nullptr); |
|
|
| if (! unlink_ok && unlink_errno != ENOENT) |
| error (0, unlink_errno, "%s", quotef (output_filename)); |
| } |
| else |
| { |
| if (!suppress_count) |
| fprintf (stdout, "%jd\n", bytes_written); |
| } |
| output_stream = nullptr; |
| } |
| } |
|
|
| |
| |
|
|
| static void |
| save_line_to_file (const struct cstring *line) |
| { |
| idx_t l = fwrite (line->str, sizeof (char), line->len, output_stream); |
| if (l != line->len) |
| { |
| error (0, errno, _("write error for %s"), quoteaf (output_filename)); |
| output_stream = nullptr; |
| cleanup_fatal (); |
| } |
| bytes_written += line->len; |
| } |
|
|
| |
|
|
| static struct control * |
| new_control_record (void) |
| { |
| static idx_t control_allocated = 0; |
| struct control *p; |
|
|
| if (control_used == control_allocated) |
| controls = xpalloc (controls, &control_allocated, 1, -1, sizeof *controls); |
| p = &controls[control_used++]; |
| p->regexpr = false; |
| p->repeat = 0; |
| p->repeat_forever = false; |
| p->lines_required = 0; |
| p->offset = 0; |
| return p; |
| } |
|
|
| |
| |
| |
| |
|
|
| static void |
| check_for_offset (struct control *p, char const *str, char const *num) |
| { |
| if (xstrtoimax (num, nullptr, 10, &p->offset, "") != LONGINT_OK) |
| error (EXIT_FAILURE, 0, _("%s: integer expected after delimiter"), |
| quote (str)); |
| } |
|
|
| |
| |
| |
| |
|
|
| static void |
| parse_repeat_count (int argnum, struct control *p, char *str) |
| { |
| char *end; |
|
|
| end = str + strlen (str) - 1; |
| if (*end != '}') |
| error (EXIT_FAILURE, 0, _("%s: '}' is required in repeat count"), |
| quote (str)); |
| *end = '\0'; |
|
|
| if (str + 1 == end - 1 && *(str + 1) == '*') |
| p->repeat_forever = true; |
| else |
| { |
| uintmax_t val; |
| if (xstrtoumax (str + 1, nullptr, 10, &val, "") != LONGINT_OK |
| || ckd_add (&p->repeat, val, 0)) |
| { |
| error (EXIT_FAILURE, 0, |
| _("%s}: integer required between '{' and '}'"), |
| quote (global_argv[argnum])); |
| } |
| } |
|
|
| *end = '}'; |
| } |
|
|
| |
| |
| |
| |
| |
|
|
| static struct control * |
| extract_regexp (int argnum, bool ignore, char const *str) |
| { |
| idx_t len; |
| char delim = *str; |
| char const *closing_delim; |
| struct control *p; |
| char const *err; |
|
|
| closing_delim = strrchr (str + 1, delim); |
| if (closing_delim == nullptr) |
| error (EXIT_FAILURE, 0, |
| _("%s: closing delimiter '%c' missing"), str, delim); |
|
|
| len = closing_delim - str - 1; |
| p = new_control_record (); |
| p->argnum = argnum; |
| p->ignore = ignore; |
|
|
| p->regexpr = true; |
| p->re_compiled.buffer = nullptr; |
| p->re_compiled.allocated = 0; |
| p->re_compiled.fastmap = xmalloc (UCHAR_MAX + 1); |
| p->re_compiled.translate = nullptr; |
| re_syntax_options = |
| RE_SYNTAX_POSIX_BASIC & ~RE_CONTEXT_INVALID_DUP & ~RE_NO_EMPTY_RANGES; |
| err = re_compile_pattern (str + 1, len, &p->re_compiled); |
| if (err) |
| { |
| error (0, 0, _("%s: invalid regular expression: %s"), quote (str), err); |
| cleanup_fatal (); |
| } |
|
|
| if (closing_delim[1]) |
| check_for_offset (p, str, closing_delim + 1); |
|
|
| return p; |
| } |
|
|
| |
| |
|
|
| static void |
| parse_patterns (int argc, int start, char **argv) |
| { |
| struct control *p; |
| static intmax_t last_val = 0; |
|
|
| for (int i = start; i < argc; i++) |
| { |
| if (*argv[i] == '/' || *argv[i] == '%') |
| { |
| p = extract_regexp (i, *argv[i] == '%', argv[i]); |
| } |
| else |
| { |
| p = new_control_record (); |
| p->argnum = i; |
|
|
| uintmax_t val; |
| if (xstrtoumax (argv[i], nullptr, 10, &val, "") != LONGINT_OK |
| || INTMAX_MAX < val) |
| error (EXIT_FAILURE, 0, _("%s: invalid pattern"), quote (argv[i])); |
| if (val == 0) |
| error (EXIT_FAILURE, 0, |
| _("%s: line number must be greater than zero"), argv[i]); |
| if (val < last_val) |
| error (EXIT_FAILURE, 0, |
| _("line number %s is smaller than preceding line number," |
| " %jd"), quote (argv[i]), last_val); |
|
|
| if (val == last_val) |
| error (0, 0, |
| _("warning: line number %s is the same as preceding line number"), |
| quote (argv[i])); |
|
|
| last_val = val; |
|
|
| p->lines_required = val; |
| } |
|
|
| if (i + 1 < argc && *argv[i + 1] == '{') |
| { |
| |
| i++; |
| parse_repeat_count (i, p, argv[i]); |
| } |
| } |
| } |
|
|
|
|
|
|
| |
| enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 }; |
|
|
| |
| |
| static idx_t |
| get_format_flags (char const *format, int *flags_ptr) |
| { |
| int flags = 0; |
|
|
| for (idx_t count = 0; ; count++) |
| { |
| switch (format[count]) |
| { |
| case '-': |
| case '0': |
| break; |
|
|
| case '\'': |
| flags |= FLAG_THOUSANDS; |
| break; |
|
|
| case '#': |
| flags |= FLAG_ALTERNATIVE; |
| break; |
|
|
| default: |
| *flags_ptr = flags; |
| return count; |
| } |
| } |
| } |
|
|
| |
| |
| |
| static void |
| check_format_conv_type (char *format, int flags) |
| { |
| unsigned char ch = *format; |
| int compatible_flags = FLAG_THOUSANDS; |
|
|
| switch (ch) |
| { |
| case 'd': |
| case 'i': |
| break; |
|
|
| case 'u': |
| *format = 'd'; |
| break; |
|
|
| case 'o': |
| case 'x': |
| case 'X': |
| compatible_flags = FLAG_ALTERNATIVE; |
| break; |
|
|
| case 0: |
| error (EXIT_FAILURE, 0, _("missing conversion specifier in suffix")); |
|
|
| default: |
| if (isprint (ch)) |
| error (EXIT_FAILURE, 0, |
| _("invalid conversion specifier in suffix: %c"), ch); |
| else |
| error (EXIT_FAILURE, 0, |
| _("invalid conversion specifier in suffix: \\%.3o"), ch); |
| } |
|
|
| if (flags & ~ compatible_flags) |
| error (EXIT_FAILURE, 0, |
| _("invalid flags in conversion specification: %%%c%c"), |
| (flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch); |
| } |
|
|
| |
| |
| |
| static idx_t |
| max_out (char *format) |
| { |
| bool percent = false; |
|
|
| for (char *f = format; *f; f++) |
| if (*f == '%' && *++f != '%') |
| { |
| if (percent) |
| error (EXIT_FAILURE, 0, |
| _("too many %% conversion specifications in suffix")); |
| percent = true; |
| int flags; |
| f += get_format_flags (f, &flags); |
| while (c_isdigit (*f)) |
| f++; |
| if (*f == '.') |
| while (c_isdigit (*++f)) |
| continue; |
| check_format_conv_type (f, flags); |
| } |
|
|
| if (! percent) |
| error (EXIT_FAILURE, 0, |
| _("missing %% conversion specification in suffix")); |
|
|
| int maxlen = snprintf (nullptr, 0, format, INT_MAX); |
| if (! (0 <= maxlen && maxlen <= IDX_MAX)) |
| xalloc_die (); |
| return maxlen; |
| } |
|
|
| int |
| main (int argc, char **argv) |
| { |
| int optc; |
|
|
| initialize_main (&argc, &argv); |
| set_program_name (argv[0]); |
| setlocale (LC_ALL, ""); |
| bindtextdomain (PACKAGE, LOCALEDIR); |
| textdomain (PACKAGE); |
|
|
| atexit (close_stdout); |
|
|
| global_argv = argv; |
| controls = nullptr; |
| control_used = 0; |
| suppress_count = false; |
| remove_files = true; |
| suppress_matched = false; |
| prefix = DEFAULT_PREFIX; |
|
|
| while ((optc = getopt_long (argc, argv, "f:b:kn:sqz", longopts, nullptr)) |
| != -1) |
| switch (optc) |
| { |
| case 'f': |
| prefix = optarg; |
| break; |
|
|
| case 'b': |
| suffix = optarg; |
| break; |
|
|
| case 'k': |
| remove_files = false; |
| break; |
|
|
| case 'n': |
| digits = xdectoimax (optarg, 0, MIN (INT_MAX, IDX_MAX), "", |
| _("invalid number"), 0); |
| break; |
|
|
| case 's': |
| case 'q': |
| suppress_count = true; |
| break; |
|
|
| case 'z': |
| elide_empty_files = true; |
| break; |
|
|
| case SUPPRESS_MATCHED_OPTION: |
| suppress_matched = true; |
| break; |
|
|
| case_GETOPT_HELP_CHAR; |
|
|
| case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); |
|
|
| default: |
| usage (EXIT_FAILURE); |
| } |
|
|
| if (argc - optind < 2) |
| { |
| if (argc <= optind) |
| error (0, 0, _("missing operand")); |
| else |
| error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); |
| usage (EXIT_FAILURE); |
| } |
|
|
| idx_t prefix_len = strlen (prefix); |
| idx_t max_digit_string_len |
| = (suffix |
| ? max_out (suffix) |
| : MAX (INT_STRLEN_BOUND (int), digits)); |
| idx_t filename_size; |
| if (ckd_add (&filename_size, prefix_len, max_digit_string_len + 1)) |
| xalloc_die (); |
| filename_space = ximalloc (filename_size); |
|
|
| set_input_file (argv[optind++]); |
|
|
| parse_patterns (argc, optind, argv); |
|
|
| { |
| int i; |
| static int const sig[] = |
| { |
| |
| SIGALRM, SIGHUP, SIGINT, SIGPIPE, SIGQUIT, SIGTERM, |
| #ifdef SIGPOLL |
| SIGPOLL, |
| #endif |
| #ifdef SIGPROF |
| SIGPROF, |
| #endif |
| #ifdef SIGVTALRM |
| SIGVTALRM, |
| #endif |
| #ifdef SIGXCPU |
| SIGXCPU, |
| #endif |
| #ifdef SIGXFSZ |
| SIGXFSZ, |
| #endif |
| }; |
| enum { nsigs = countof (sig) }; |
|
|
| struct sigaction act; |
|
|
| sigemptyset (&caught_signals); |
| for (i = 0; i < nsigs; i++) |
| { |
| sigaction (sig[i], nullptr, &act); |
| if (act.sa_handler != SIG_IGN) |
| sigaddset (&caught_signals, sig[i]); |
| } |
|
|
| act.sa_handler = interrupt_handler; |
| act.sa_mask = caught_signals; |
| act.sa_flags = 0; |
|
|
| for (i = 0; i < nsigs; i++) |
| if (sigismember (&caught_signals, sig[i])) |
| sigaction (sig[i], &act, nullptr); |
| } |
|
|
| split_file (); |
|
|
| if (close (STDIN_FILENO) != 0) |
| { |
| error (0, errno, _("read error")); |
| cleanup_fatal (); |
| } |
|
|
| return EXIT_SUCCESS; |
| } |
|
|
| void |
| usage (int status) |
| { |
| if (status != EXIT_SUCCESS) |
| emit_try_help (); |
| else |
| { |
| printf (_("\ |
| Usage: %s [OPTION]... FILE PATTERN...\n\ |
| "), |
| program_name); |
| fputs (_("\ |
| Output pieces of FILE separated by PATTERN(s) to files 'xx00', 'xx01', ...,\n\ |
| and output byte counts of each piece to standard output.\n\ |
| "), stdout); |
| fputs (_("\ |
| \n\ |
| Read standard input if FILE is -\n\ |
| "), stdout); |
|
|
| emit_mandatory_arg_note (); |
|
|
| fputs (_("\ |
| -b, --suffix-format=FORMAT use sprintf FORMAT instead of %02d\n\ |
| -f, --prefix=PREFIX use PREFIX instead of 'xx'\n\ |
| -k, --keep-files do not remove output files on errors\n\ |
| "), stdout); |
| fputs (_("\ |
| --suppress-matched suppress the lines matching PATTERN\n\ |
| "), stdout); |
| fputs (_("\ |
| -n, --digits=DIGITS use specified number of digits instead of 2\n\ |
| -s, --quiet, --silent do not print counts of output file sizes\n\ |
| -z, --elide-empty-files suppress empty output files\n\ |
| "), stdout); |
| fputs (HELP_OPTION_DESCRIPTION, stdout); |
| fputs (VERSION_OPTION_DESCRIPTION, stdout); |
| fputs (_("\ |
| \n\ |
| Each PATTERN may be:\n\ |
| INTEGER copy up to but not including specified line number\n\ |
| /REGEXP/[OFFSET] copy up to but not including a matching line\n\ |
| %REGEXP%[OFFSET] skip to, but not including a matching line\n\ |
| {INTEGER} repeat the previous pattern specified number of times\n\ |
| {*} repeat the previous pattern as many times as possible\n\ |
| \n\ |
| A line OFFSET is an integer optionally preceded by '+' or '-'\n\ |
| "), stdout); |
| emit_ancillary_info (PROGRAM_NAME); |
| } |
| exit (status); |
| } |
|
|