diff --git a/flang/preprocessor.cc b/flang/preprocessor.cc
new file mode 100644
index 000000000000..c2dbeaf1f27c
--- /dev/null
+++ b/flang/preprocessor.cc
@@ -0,0 +1,421 @@
+#include "preprocessor.h"
+#include "char-buffer.h"
+#include "idioms.h"
+#include "prescan.h"
+#include <map>
+#include <memory>
+#include <set>
+#include <utility>
+
+namespace Fortran {
+
+void TokenSequence::Append(const TokenSequence &that) {
+  if (nextStart_ < char_.size()) {
+    start_.push_back(nextStart_);
+  }
+  int offset = char_.size();
+  for (int st : that.start_) {
+    start_.push_back(st + offset);
+  }
+  char_.insert(char_.end(), that.char_.begin(), that.char_.end());
+  nextStart_ = char_.size();
+}
+
+void TokenSequence::Emit(CharBuffer *out) {
+  out->Put(char_);
+}
+
+Definition::Definition(const TokenSequence &repl, size_t firstToken,
+                       size_t tokens)
+  : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
+
+Definition::Definition(const std::vector<std::string> &argNames,
+                       const TokenSequence &repl, size_t firstToken,
+                       size_t tokens)
+  : isFunctionLike_{true}, argumentCount_(argNames.size()),
+    replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
+
+bool Definition::set_isDisabled(bool disable) {
+  bool was{isDisabled_};
+  isDisabled_ = disable;
+  return was;
+}
+
+TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
+                                   const TokenSequence &token,
+                                   size_t firstToken, size_t tokens) {
+  std::map<std::string, std::string> args;
+  char argIndex{'A'};
+  for (const std::string &arg : argNames) {
+    CHECK(args.find(arg) == args.end());
+    args[arg] = "~"s + argIndex++;
+  }
+  TokenSequence result;
+  bool pasting{false};
+  for (size_t j{0}; j < tokens; ++j) {
+    size_t bytes{token.GetBytes(firstToken + j)};
+    if (bytes == 0) {
+      continue;
+    }
+    const char *text{token.GetText(firstToken + j)};
+    if (bytes == 2 && text[0] == '#' && text[1] == '#') {
+      for (size_t rtc{result.size()};
+           rtc > 0 && (result.GetBytes(rtc-1) == 0 ||
+                       *result.GetText(rtc-1) == ' ');
+           --rtc) {
+        result.pop_back();
+      }
+      pasting = true;
+      continue;
+    }
+    if (*text == ' ') {
+      if (pasting) {
+        continue;
+      }
+    } else {
+      pasting = false;
+      if (bytes > 0 && (*text == '_' || isalpha(*text))) {
+        auto it = args.find(token.GetString(firstToken + j));
+        if (it != args.end()) {
+          result.push_back(it->second);
+          continue;
+        }
+      }
+    }
+    result.push_back(text, bytes);
+  }
+  return result;
+}
+
+TokenSequence Definition::Apply(const std::vector<TokenSequence> &args) {
+  TokenSequence result;
+  bool stringify{false};
+  size_t tokens{replacement_.size()};
+  for (size_t j{0}; j < tokens; ++j) {
+    size_t bytes{replacement_.GetBytes(j)};
+    const char *text{replacement_.GetText(j)};
+    if (bytes == 2 && *text == '~') {
+      size_t index = text[1] - 'A';
+      if (index >= args.size()) {
+        continue;
+      }
+      size_t argTokens{args[index].size()};
+      if (stringify) {
+        std::string strung{'"'};
+        for (size_t k{0}; k < argTokens; ++k) {
+          size_t argBytes{args[index].GetBytes(k)};
+          const char *arg{args[index].GetText(k)};
+          for (size_t n{0}; n < argBytes; ++n) {
+            char ch{arg[n]};
+            if (ch == '"' || ch == '\\') {
+              strung += '\\';
+            }
+            strung += ch;
+          }
+        }
+        strung += '"';
+        result.pop_back();  // remove the '#'
+        result.push_back(strung);
+      } else {
+        for (size_t k{0}; k < argTokens; ++k) {
+          result.push_back(args[index].GetText(k), args[index].GetBytes(k));
+        }
+      }
+    } else {
+      stringify = bytes == 1 && *text == '#';
+      result.push_back(text, bytes);
+    }
+  }
+  return result;
+}
+
+bool Preprocessor::MacroReplacement(const TokenSequence &input,
+                                    TokenSequence *result) {
+  // Do quick scan for any use of a defined name.
+  if (definitions_.empty()) {
+    return false;
+  }
+  size_t tokens{input.size()};
+  size_t j;
+  for (j = 0; j < tokens; ++j) {
+    const char *text{input.GetText(j)};
+    size_t bytes{input.GetBytes(j)};
+    if (bytes > 0 &&
+        (*text == '_' || isalpha(*text)) &&
+        definitions_.find(CharPointerWithLength{text, bytes}) !=
+          definitions_.end()) {
+      break;
+    }
+  }
+  if (j == tokens) {
+    return false;  // nothing appeared that could be replaced
+  }
+
+  for (size_t k{0}; k < j; ++k) {
+    result->push_back(input.GetToken(k));
+  }
+  for (; j < tokens; ++j) {
+    size_t bytes{input.GetBytes(j)};
+    const char *text{input.GetText(j)};
+    if (bytes == 0 || (!isalpha(*text) && *text != '_')) {
+      result->push_back(text, bytes);
+      continue;
+    }
+    auto it = definitions_.find(CharPointerWithLength{text, bytes});
+    if (it == definitions_.end()) {
+      result->push_back(text, bytes);
+      continue;
+    }
+    Definition &def{it->second};
+    if (def.isDisabled()) {
+      result->push_back(text, bytes);
+      continue;
+    }
+    if (!def.isFunctionLike()) {
+      def.set_isDisabled(true);
+      TokenSequence repl;
+      result->Append(MacroReplacement(def.replacement(), &repl) ? repl
+                       : def.replacement());
+      def.set_isDisabled(false);
+      continue;
+    }
+    // Possible function-like macro call.  Skip spaces and newlines to see
+    // whether '(' is next.
+    size_t k{j};
+    bool leftParen{false};
+    while (++k < tokens) {
+      size_t bytes{input.GetBytes(k)};
+      const char *text{input.GetText(k)};
+      if (bytes > 0 && *text != ' ' && *text != '\n') {
+        leftParen = bytes == 1 && *text == '(';
+        break;
+      }
+    }
+    if (!leftParen) {
+      result->push_back(text, bytes);
+      continue;
+    }
+    std::vector<size_t> argStart{++k};
+    for (int nesting{0}; k < tokens; ++k) {
+      size_t bytes{input.GetBytes(k)};
+      const char *text{input.GetText(k)};
+      if (bytes == 1 && *text == '(') {
+        ++nesting;
+      } else if (bytes == 1 && *text == ')') {
+        if (nesting == 0) {
+          break;
+        }
+        --nesting;
+      } else if (bytes == 1 && *text == ',' && nesting == 0) {
+        argStart.push_back(k + 1);
+      }
+    }
+    if (k >= tokens ||
+        argStart.size() != def.argumentCount()) {
+      result->push_back(text, bytes);
+      continue;
+    }
+    j = k;  // advance to the terminal ')'
+    std::vector<TokenSequence> args;
+    for (k = 0; k < argStart.size(); ++k) {
+      size_t at{argStart[k]};
+      size_t count{(k + 1 == argStart.size() ? j : argStart[k+1] - 1) - at};
+      TokenSequence actual;
+      for (; count-- > 0; ++at) {
+        actual.push_back(input.GetText(at), input.GetBytes(at));
+      }
+      TokenSequence arg;
+      if (!MacroReplacement(actual, &arg)) {
+        args.emplace_back(std::move(actual));
+      } else {
+        args.emplace_back(std::move(arg));
+      }
+    }
+    TokenSequence repl{def.Apply(args)};
+    def.set_isDisabled(true);
+    TokenSequence rescanned;
+    result->Append(MacroReplacement(repl, &rescanned) ? rescanned : repl);
+    def.set_isDisabled(false);
+  }
+  return true;
+}
+
+static size_t SkipBlanks(const TokenSequence &token, size_t at) {
+  for (; at < token.size(); ++at) {
+    if (token.GetBytes(at) > 0 && *token.GetText(at) != ' ') {
+      break;
+    }
+  }
+  return at;
+}
+
+static std::string GetDirectiveName(const TokenSequence &line) {
+  size_t tokens{line.size()};
+  size_t j{SkipBlanks(line, 0)};
+  if (j == tokens || line.GetString(j) != "#") {
+    return ""s;
+  }
+  j = SkipBlanks(line, j + 1);
+  if (j == tokens) {
+    return ""s;
+  }
+  return line.GetString(j);
+}
+
+std::string Preprocessor::Directive(const TokenSequence &dir) {
+  size_t tokens{dir.size()};
+  size_t j{SkipBlanks(dir, 0)};
+  if (j == tokens) {
+    return ""s;
+  }
+  if (dir.GetString(j) != "#") {
+    return "missing '#'"s;
+  }
+  j = SkipBlanks(dir, j + 1);
+  if (j == tokens) {
+    return ""s;
+  }
+  if (isdigit(*dir.GetText(j)) || *dir.GetText(j) == '"') {
+    return ""s;  // TODO: treat as #line
+  }
+  std::string dirName{dir.GetString(j)};
+  j = SkipBlanks(dir, j + 1);
+  std::string nameString;
+  CharPointerWithLength nameToken;
+  if (j < tokens && (isalpha(*dir.GetText(j)) || *dir.GetText(j) == '_')) {
+    nameString = dir.GetString(j);
+    nameToken = dir.GetToken(j);
+  }
+  if (dirName == "define") {
+    if (nameToken.empty()) {
+      return "#define: missing or invalid name"s;
+    }
+    // Get a pointer to a "permanent" copy of the name for use as the
+    // key in the definitions_ map.
+    names_.push_back(nameString);
+    nameToken = CharPointerWithLength{names_.back().data(),
+                                      names_.back().size()};
+    definitions_.erase(nameToken);
+    if (++j < tokens && dir.GetBytes(j) == 1 && *dir.GetText(j) == '(') {
+      j = SkipBlanks(dir, j + 1);
+      std::vector<std::string> argName;
+      if (dir.GetString(j) != ")") {
+        while (true) {
+          std::string an{dir.GetString(j)};
+          if (an.empty() || (an[0] != '_' && !isalpha(an[0]))) {
+            return "#define: missing or invalid argument name"s;
+          }
+          argName.push_back(an);
+          j = SkipBlanks(dir, j + 1);
+          if (j == tokens) {
+            return "#define: malformed argument list"s;
+          }
+          std::string punc{dir.GetString(j)};
+          if (punc == ")") {
+            break;
+          }
+          if (punc != ",") {
+            return "#define: malformed argument list"s;
+          }
+          j = SkipBlanks(dir, j + 1);
+          if (j == tokens) {
+            return "#define: malformed argument list"s;
+          }
+        }
+        if (std::set<std::string>(argName.begin(), argName.end()).size() !=
+            argName.size()) {
+          return "#define: argument names are not distinct"s;
+        }
+      }
+      j = SkipBlanks(dir, j + 1);
+      definitions_.emplace(
+        std::make_pair(nameToken, Definition{argName, dir, j, tokens - j}));
+    } else {
+      j = SkipBlanks(dir, j + 1);
+      definitions_.emplace(
+        std::make_pair(nameToken, Definition{dir, j, tokens - j}));
+    }
+    return ""s;
+  }
+  if (dirName == "undef") {
+    if (nameToken.empty()) {
+      return "#undef: missing or invalid name"s;
+    }
+    j = SkipBlanks(dir, j + 1);
+    if (j != tokens) {
+      return "#undef: excess tokens at end of directive"s;
+    }
+    definitions_.erase(nameToken);
+    return ""s;
+  }
+  if (dirName == "ifdef" || dirName == "ifndef") {
+    if (nameToken.empty()) {
+      return "#"s + dirName + ": missing name";
+    }
+    j = SkipBlanks(dir, j + 1);
+    if (j != tokens) {
+      return "#"s + dirName + ": excess tokens at end of directive";
+    }
+    auto it = definitions_.find(nameToken);
+    if ((it != definitions_.end()) == (dirName == "ifdef")) {
+      ifStack_.push(true);  // #else / #elsif allowed
+      return {};
+    }
+    int nesting{0};
+    while (std::optional<TokenSequence>
+             line{prescanner_->NextTokenizedLine()}) {
+      std::string dn{GetDirectiveName(*line)};
+      if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
+        ++nesting;
+      } else if (dn == "endif") {
+        if (nesting-- == 0) {
+          return ""s;
+        }
+      } else if (dn == "else" && nesting == 0) {
+        ifStack_.push(false);
+        return ""s;
+      } // TODO: #elsif
+    }
+    return "#"s + dirName + ": missing #endif";
+  }
+  if (dirName == "else") {
+    j = SkipBlanks(dir, j);
+    if (j != tokens) {
+      return "#else: excess tokens at end of directive"s;
+    }
+    if (ifStack_.empty()) {
+      return "#else: no #if, #ifdef, or #ifndef"s;
+    }
+    if (!ifStack_.top()) {
+      return "#else: already appeared in this #if, #ifdef, or #ifndef"s;
+    }
+    ifStack_.pop();
+    int nesting{0};
+    while (std::optional<TokenSequence>
+             line{prescanner_->NextTokenizedLine()}) {
+      std::string dn{GetDirectiveName(*line)};
+      if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
+        ++nesting;
+      } else if (dn == "endif") {
+        if (nesting-- == 0) {
+          return ""s;
+        }
+      }
+    }
+    return "#else: missing #endif"s;
+  }
+  // TODO: #if, #elsif with macro replacement on expressions
+  if (dirName == "endif") {
+    j = SkipBlanks(dir, j);
+    if (j != tokens) {
+      return "#endif: excess tokens at end of directive"s;
+    }
+    if (ifStack_.empty()) {
+      return "#endif: no #if, #ifdef, or #ifndef"s;
+    }
+    ifStack_.pop();
+    return ""s;
+  }
+  return "#"s + dirName + ": unknown or unimplemented directive";
+}
+}  // namespace Fortran
diff --git a/flang/preprocessor.h b/flang/preprocessor.h
new file mode 100644
index 000000000000..eda9839fec3e
--- /dev/null
+++ b/flang/preprocessor.h
@@ -0,0 +1,213 @@
+#ifndef FORTRAN_PREPROCESSOR_H_
+#define FORTRAN_PREPROCESSOR_H_
+
+// A Fortran-aware preprocessing module used by the prescanner to implement
+// preprocessing directives and macro replacement.  Intended to be efficient
+// enough to always run on all source files even when no preprocessing is
+// needed, so that special compiler command options &/or source file name
+// extensions for preprocessing will not be necessary.
+
+#include "idioms.h"
+#include <cctype>
+#include <cstring>
+#include <functional>
+#include <list>
+#include <stack>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace Fortran {
+
+class CharBuffer;
+class Prescanner;
+
+// Just a const char pointer with an associated length; does not own the
+// referenced data.  Used to describe buffered tokens and hash table keys.
+struct CharPointerWithLength {
+  CharPointerWithLength() {}
+  CharPointerWithLength(const char *x, size_t n) : data{x}, bytes{n} {}
+  CharPointerWithLength(const CharPointerWithLength &that)
+    : data{that.data}, bytes{that.bytes} {}
+  CharPointerWithLength &operator=(const CharPointerWithLength &that) {
+    data = that.data;
+    bytes = that.bytes;
+    return *this;
+  }
+
+  bool empty() const { return bytes == 0; }
+  size_t size() const { return bytes; }
+  const char &operator[](size_t j) const { return data[j]; }
+
+  const char *data{nullptr};
+  size_t bytes{0};
+};
+}  // namespace Fortran
+
+// Specializations to enable std::unordered_map<CharPointerWithLength, ...>
+template<> struct std::hash<Fortran::CharPointerWithLength> {
+  size_t operator()(const Fortran::CharPointerWithLength &x) const {
+    size_t hash{0};
+    const char *p{x.data}, *limit{p + x.bytes};
+    for (; p < limit; ++p) {
+      hash = (hash * 31) ^ *p;
+    }
+    return hash;
+  }
+};
+
+template<> struct std::equal_to<Fortran::CharPointerWithLength> {
+  bool operator()(const Fortran::CharPointerWithLength &x,
+                  const Fortran::CharPointerWithLength &y) const {
+    return x.bytes == y.bytes &&
+           std::memcmp(static_cast<const void *>(x.data),
+                       static_cast<const void *>(y.data),
+                       x.bytes) == 0;
+  }
+};
+
+namespace Fortran {
+
+// Buffers a contiguous sequence of characters that has been partitioned into
+// a sequence of preprocessing tokens.
+class TokenSequence {
+ public:
+  TokenSequence() {}
+  TokenSequence(TokenSequence &&that)
+    : start_{std::move(that.start_)}, nextStart_{that.nextStart_},
+      char_{std::move(that.char_)} {}
+  TokenSequence &operator=(TokenSequence &&that) {
+    start_ = std::move(that.start_);
+    nextStart_ = that.nextStart_;
+    char_ = std::move(that.char_);
+    return *this;
+  }
+
+  size_t GetBytes(size_t token) const {
+    return (token + 1 >= start_.size() ? char_.size() : start_[token + 1]) -
+           start_[token];
+  }
+  const char *GetText(size_t token) const {
+    return &char_[start_[token]];
+  }
+  std::string GetString(size_t token) const {
+    return std::string(GetText(token), GetBytes(token));
+  }
+  CharPointerWithLength GetToken(size_t token) const {
+    return {GetText(token), GetBytes(token)};
+  }
+
+  void AddChar(char ch) {
+    char_.emplace_back(ch);
+  }
+  void EndToken() {
+    // CHECK(char_.size() > nextStart_);
+    start_.emplace_back(nextStart_);
+    nextStart_ = char_.size();
+  }
+
+  void Append(const TokenSequence &);
+
+  void Emit(CharBuffer *);
+
+  bool empty() const { return start_.empty(); }
+
+  size_t size() const { return start_.size(); }
+
+  void clear() {
+    start_.clear();
+    nextStart_ = 0;
+    char_.clear();
+  }
+
+  void pop_back() {
+    nextStart_ = start_.back();
+    start_.pop_back();
+    char_.resize(nextStart_);
+  }
+
+  void push_back(const char *s, size_t bytes) {
+    for (size_t j{0}; j < bytes; ++j) {
+      AddChar(s[j]);
+    }
+    EndToken();
+  }
+
+  void push_back(const CharPointerWithLength &t) {
+    for (size_t j{0}; j < t.bytes; ++j) {
+      AddChar(t[j]);
+    }
+    EndToken();
+  }
+
+  void push_back(const std::string &s) {
+    size_t bytes{s.size()};
+    for (size_t j{0}; j < bytes; ++j) {
+      AddChar(s[j]);
+    }
+    EndToken();
+  }
+
+  void shrink_to_fit() {
+    start_.shrink_to_fit();
+    char_.shrink_to_fit();
+  }
+
+ private:
+  std::vector<int> start_;
+  size_t nextStart_{0};
+  std::vector<char> char_;
+};
+
+// Defines a macro
+class Definition {
+ public:
+  Definition(const TokenSequence &, size_t firstToken, size_t tokens);
+  Definition(const std::vector<std::string> &argNames, const TokenSequence &,
+             size_t firstToken, size_t tokens);
+
+  bool isFunctionLike() const { return isFunctionLike_; }
+  size_t argumentCount() const { return argumentCount_; }
+  bool isVariadic() const { return isVariadic_; }
+  bool isDisabled() const { return isDisabled_; }
+  const TokenSequence &replacement() const { return replacement_; }
+
+  bool set_isDisabled(bool disable);
+
+  TokenSequence Apply(const std::vector<TokenSequence> &args);
+
+ private:
+  static TokenSequence Tokenize(const std::vector<std::string> &argNames,
+                                const TokenSequence &token, size_t firstToken,
+                                size_t tokens);
+
+  bool isFunctionLike_{false};
+  size_t argumentCount_{0};
+  bool isVariadic_{false};
+  bool isDisabled_{false};
+  TokenSequence replacement_;
+};
+
+// Preprocessing state
+class Preprocessor {
+ public:
+  Preprocessor(Prescanner *ps) : prescanner_{ps} {}
+
+  // When the input contains macros to be replaced, the new token sequence
+  // is appended to the output and the returned value is true.  When
+  // no macro replacement is necessary, the output is unmodified and the
+  // return value is false.
+  bool MacroReplacement(const TokenSequence &, TokenSequence *);
+
+  // Implements a preprocessor directive; returns an error message, or an
+  // empty string when successful.
+  std::string Directive(const TokenSequence &);
+
+ private:
+  std::list<std::string> names_;
+  std::unordered_map<CharPointerWithLength, Definition> definitions_;
+  std::stack<bool> ifStack_;
+  Prescanner *prescanner_;
+};
+}  // namespace Fortran
+#endif  // FORTRAN_PREPROCESSOR_H_