2

I need to write a program that can remove comments from a file. Removing standard comments is quite simple. However, I haven't been able to come up with a way to remove a "hyphenated" comment. For example, the first two lines of code are comments. My program does not understand this and removes only the first line of the comment.

// Function for output to console\
    ns2

#define _CRT_SECURE_NO_WARNINGS 
#include <stdio.h>
#include <stdlib.h>
#include <iso646.h>

int main() {
    FILE *in = fopen("inp.c", "r");
    FILE *out = fopen("out.c", "w");
    char ch;
    while ((ch = fgetc(in)) not_eq EOF) {
        if (ch == '/') {
            int flag_1 = 0;
            ch = fgetc(in);
            if (ch == '/') {
                flag_1 = 1;
                while ((ch = fgetc(in)) not_eq '\n') {
                    ch = fgetc(in);
                    if (ch == '\\') {
                        ch = '\n'; 
                        break;
                    }
                }
                ch = '\n';
            }
            else if (ch == '*') {
                flag_1 = 1;
                while ((ch = fgetc(in)) not_eq EOF) {
                    if ((ch = fgetc(in)) == '*' and (ch = fgetc(in)) == '/') {
                        ch = '\n';
                        break;
                    }
                }
            }
            if (flag_1 == 0)
                fputc('/', out);
        }
        if (ch == '"') {
            fputc(ch, out);
            while ((ch = fgetc(in)) not_eq '"') {
                fputc(ch, out);
            }
        }
        fputc(ch, out);
    }
    fclose(in);
    fclose(out);
    return 0;
}

I have been trying to supplement / change the program for more than an hour, but the knowledge of a beginner does not allow me to master this task. I will be glad to your advice!

chqrlie
  • 114,102
  • 10
  • 108
  • 170
Ossowitz
  • 43
  • 5
  • This has [been asked](https://stackoverflow.com/questions/36454069/how-to-remove-c-style-comments-from-code), and perhaps may answer your question. ([and here](https://stackoverflow.com/questions/14975737/regular-expression-to-remove-comment)) – ryyker Jan 25 '22 at 15:55
  • 3
    Handling C comments thoroughly is hard. Note that `printf("// not a comment\n");` does not contain a comment. And `printf("/* not the start of a comment\n");` doesn't contain the start of a comment. (For the detail-oriented, you can write `int j = '//';` and that is not the start of a comment; ditto `int k = '/*';`. Multi-character constants are permitted but the result is implementation-defined.) To fix your code, you need to spot a backslash followed by a newline and continue the single-line comment if you encounter that. – Jonathan Leffler Jan 25 '22 at 16:01
  • The ```\``` when at the end of a line in `C` is used as a continuation of the previous line. You would have to incorporate that symbol into one of the regular expressions linked in the 1st comment . – ryyker Jan 25 '22 at 16:04
  • 2
    And, related to my previous comment, you do attempt to handle quoted strings, but `printf("This \" is mishandled /* and this does not start a comment\n");`. Backslashes make life complex. Don't ask about C++ and raw string literals, or punctuation in numbers (`0b0101'1100` is a valid binary literal in C++; it is not valid in C). – Jonathan Leffler Jan 25 '22 at 16:14
  • This `char ch = '"';` isn't the beginnering of a string literal (which might contain a non-commment) either, and so on. – Weather Vane Jan 25 '22 at 16:25

1 Answers1

2

In order to ignore the escaped newlines, sequences of \ followed by a newline, you could use a function that handles this transparently.

Note also these issues:

  • ch must be defined as an int to handle EOF correctly.
  • the macros defined in <iso646.h> make the code less readable.
  • \ should be handled when parsing strings.
  • character constants should be parsed too: '//' is a valid character constant, not a comment.
// Function for output to console\
    ns2
/\
*\ This is a valid comment too :) 
*\
/

#define _CRT_SECURE_NO_WARNINGS 
#include <stdio.h>

int mygetc(FILE *in) {
    for (;;) {
        int c = getc(in);
        if (c == '\\') {
            c = getc(in);
            if (c == '\n')
                continue;
            if (c != EOF)
                ungetc(c, in);
            c = '\\';
        }
        return c;
    }
}

int skip_line_comment(FILE *in) {
    int c;
    while ((c = mygetc(in)) != '\n' && c != EOF)
        continue;
    return c;
}

int skip_block_comment(FILE *in) {
    int c;
    for (;;) {
        while ((c = mygetc(in)) != '*') {
            if (c == EOF)
                return c;
        }
        while ((c = mygetc(in)) == '*')
            continue;
        if (c == EOF)
            return c;
        if (c == '/')
            return ' ';
    }
}

int main() {
    FILE *in = fopen("inp.c", "r");
    FILE *out = fopen("out.c", "w");
    int ch;
    while ((ch = mygetc(in)) != EOF) {
        if (ch == '/') {
            ch = skip_line_comment(in);
        } else
        if (ch == '*') {
            ch = skip_block_comment(in);
        } else
        if (ch == '"' || ch == '\'') {
            int sep = ch;
            fputc(ch, out);
            while ((ch = mygetc(in)) != sep && ch != EOF) {
                fputc(ch, out);
                if (ch == '\\') {
                    ch = mygetc(in);
                    if (ch == EOF)
                        break;
                    fputc(ch, out);
                }
            }
        }
        if (ch == EOF)
            break;
        fputc(ch, out);
    }
    fclose(in);
    fclose(out);
    return 0;
}
chqrlie
  • 114,102
  • 10
  • 108
  • 170