// Copyright 2015 Matthew Holt
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package certmagic

import (
	"context"
	"crypto/tls"
	"encoding/base64"
	"fmt"
	"log"
	weakrand "math/rand"
	"net"
	"net/http"
	"net/url"
	"strconv"
	"strings"
	"sync"
	"time"

	"github.com/go-acme/lego/v3/acme"
	"github.com/go-acme/lego/v3/certificate"
	"github.com/go-acme/lego/v3/challenge"
	"github.com/go-acme/lego/v3/lego"
	"github.com/go-acme/lego/v3/registration"
)

func init() {
	weakrand.Seed(time.Now().UnixNano())
}

// acmeClient is a wrapper over lego's acme.Client with
// some custom state attached. It is used to obtain,
// renew, and revoke certificates with ACME. Use
// ACMEManager.newACMEClient() or
// ACMEManager.newACMEClientWithRetry() to get a valid
// one for real use.
type acmeClient struct {
	caURL      string
	mgr        *ACMEManager
	acmeClient *lego.Client
	challenges []challenge.Type
}

// newACMEClientWithRetry is the same as newACMEClient, but with
// automatic retry capabilities. Sometimes network connections or
// HTTP requests fail intermittently, even when requesting the
// directory endpoint for example, so we can avoid that by just
// retrying once. Failures here are rare and sporadic, usually,
// so a simple retry is an easy fix.
func (am *ACMEManager) newACMEClientWithRetry(useTestCA bool) (*acmeClient, error) {
	var client *acmeClient
	var err error
	const maxTries = 2
	for i := 0; i < maxTries; i++ {
		if i > 0 {
			time.Sleep(2 * time.Second)
		}
		client, err = am.newACMEClient(useTestCA, false) // TODO: move logic that requires interactivity to way before this part of the process...
		if err == nil {
			break
		}
		if acmeErr, ok := err.(acme.ProblemDetails); ok {
			if acmeErr.HTTPStatus == http.StatusTooManyRequests {
				return nil, fmt.Errorf("too many requests making new ACME client: %+v - aborting", acmeErr)
			}
		}
		log.Printf("[ERROR] Making new ACME client: %v (attempt %d/%d)", err, i+1, maxTries)
	}
	return client, err
}

// newACMEClient creates the underlying ACME library client type.
// If useTestCA is true, am.TestCA will be used if it is set;
// otherwise, the primary CA will still be used.
func (am *ACMEManager) newACMEClient(useTestCA, interactive bool) (*acmeClient, error) {
	acmeClientsMu.Lock()
	defer acmeClientsMu.Unlock()

	// ensure defaults are filled in
	certObtainTimeout := am.CertObtainTimeout
	if certObtainTimeout == 0 {
		certObtainTimeout = DefaultACME.CertObtainTimeout
	}
	var caURL string
	if useTestCA {
		caURL = am.TestCA
		// only use the default test CA if the CA is also
		// the default CA; no point in testing against
		// Let's Encrypt's staging server if we are not
		// using their production server too
		if caURL == "" && am.CA == DefaultACME.CA {
			caURL = DefaultACME.TestCA
		}
	}
	if caURL == "" {
		caURL = am.CA
	}
	if caURL == "" {
		caURL = DefaultACME.CA
	}

	// ensure endpoint is secure (assume HTTPS if scheme is missing)
	if !strings.Contains(caURL, "://") {
		caURL = "https://" + caURL
	}
	u, err := url.Parse(caURL)
	if err != nil {
		return nil, err
	}
	if u.Scheme != "https" && !isLoopback(u.Host) && !isInternal(u.Host) {
		return nil, fmt.Errorf("%s: insecure CA URL (HTTPS required)", caURL)
	}

	// look up or create the user account
	leUser, err := am.getUser(caURL, am.Email)
	if err != nil {
		return nil, err
	}

	// if a lego client with this configuration already exists, reuse it
	clientKey := caURL + leUser.Email
	client, ok := acmeClients[clientKey]
	if !ok {
		// the client facilitates our communication with the CA server
		legoCfg := lego.NewConfig(leUser)
		legoCfg.CADirURL = caURL
		legoCfg.UserAgent = buildUAString()
		legoCfg.HTTPClient.Timeout = HTTPTimeout
		legoCfg.Certificate = lego.CertificateConfig{
			Timeout: am.CertObtainTimeout,
		}
		if am.TrustedRoots != nil {
			if ht, ok := legoCfg.HTTPClient.Transport.(*http.Transport); ok {
				if ht.TLSClientConfig == nil {
					ht.TLSClientConfig = new(tls.Config)
					ht.ForceAttemptHTTP2 = true
				}
				ht.TLSClientConfig.RootCAs = am.TrustedRoots
			}
		}
		client, err = lego.NewClient(legoCfg)
		if err != nil {
			return nil, err
		}
		acmeClients[clientKey] = client
	}

	// if not registered, the user must register an account
	// with the CA and agree to terms
	if leUser.Registration == nil {
		if interactive { // can't prompt a user who isn't there
			termsURL := client.GetToSURL()
			if !am.Agreed && termsURL != "" {
				am.Agreed = am.askUserAgreement(client.GetToSURL())
			}
			if !am.Agreed && termsURL != "" {
				return nil, fmt.Errorf("user must agree to CA terms")
			}
		}

		var reg *registration.Resource
		if am.ExternalAccount != nil {
			reg, err = client.Registration.RegisterWithExternalAccountBinding(registration.RegisterEABOptions{
				TermsOfServiceAgreed: am.Agreed,
				Kid:                  am.ExternalAccount.KeyID,
				HmacEncoded:          base64.StdEncoding.EncodeToString(am.ExternalAccount.HMAC),
			})
		} else {
			reg, err = client.Registration.Register(registration.RegisterOptions{
				TermsOfServiceAgreed: am.Agreed,
			})
		}
		if err != nil {
			return nil, err
		}
		leUser.Registration = reg

		// persist the user to storage
		err = am.saveUser(caURL, leUser)
		if err != nil {
			return nil, fmt.Errorf("could not save user: %v", err)
		}
	}

	c := &acmeClient{
		caURL:      caURL,
		mgr:        am,
		acmeClient: client,
	}

	return c, nil
}

// initialChallenges returns the initial set of challenges
// to try using c.config as a basis.
func (c *acmeClient) initialChallenges() []challenge.Type {
	// if configured, use DNS challenge exclusively
	if c.mgr.DNSProvider != nil {
		return []challenge.Type{challenge.DNS01}
	}

	// otherwise, use HTTP and TLS-ALPN challenges if enabled
	var chal []challenge.Type
	if !c.mgr.DisableHTTPChallenge {
		chal = append(chal, challenge.HTTP01)
	}
	if !c.mgr.DisableTLSALPNChallenge {
		chal = append(chal, challenge.TLSALPN01)
	}
	return chal
}

// nextChallenge chooses a challenge randomly from the given list of
// available challenges and configures c.acmeClient to use that challenge
// according to c.config. It pops the chosen challenge from the list and
// returns that challenge along with the new list without that challenge.
// If len(available) == 0, this is a no-op.
//
// Don't even get me started on how dumb it is we need to do this here
// instead of the upstream lego library doing it for us. Lego used to
// randomize the challenge order, thus allowing another one to be used
// if the first one failed. https://github.com/go-acme/lego/issues/842
// (It also has an awkward API for adjusting the available challenges.)
// At time of writing, lego doesn't try anything other than the TLS-ALPN
// challenge, even if the HTTP challenge is also enabled. So we take
// matters into our own hands and enable only one challenge at a time
// in the underlying client, randomly selected by us.
func (c *acmeClient) nextChallenge(available []challenge.Type) (challenge.Type, []challenge.Type) {
	if len(available) == 0 {
		return "", available
	}

	// make sure we choose a challenge randomly, which lego used to do but
	// the critical feature was surreptitiously removed in ~2018 in a commit
	// too large to review, oh well - choose one, then remove it from the
	// list of available challenges so it doesn't get retried
	randIdx := weakrand.Intn(len(available))
	randomChallenge := available[randIdx]
	available = append(available[:randIdx], available[randIdx+1:]...)

	// clean the slate, since we reuse clients
	c.acmeClient.Challenge.Remove(challenge.HTTP01)
	c.acmeClient.Challenge.Remove(challenge.TLSALPN01)
	c.acmeClient.Challenge.Remove(challenge.DNS01)

	switch randomChallenge {
	case challenge.HTTP01:
		useHTTPPort := HTTPChallengePort
		if HTTPPort > 0 && HTTPPort != HTTPChallengePort {
			useHTTPPort = HTTPPort
		}
		if c.mgr.AltHTTPPort > 0 {
			useHTTPPort = c.mgr.AltHTTPPort
		}

		c.acmeClient.Challenge.SetHTTP01Provider(distributedSolver{
			acmeManager: c.mgr,
			providerServer: &httpSolver{
				acmeManager: c.mgr,
				address:     net.JoinHostPort(c.mgr.ListenHost, strconv.Itoa(useHTTPPort)),
			},
			caURL: c.caURL,
		})

	case challenge.TLSALPN01:
		useTLSALPNPort := TLSALPNChallengePort
		if HTTPSPort > 0 && HTTPSPort != TLSALPNChallengePort {
			useTLSALPNPort = HTTPSPort
		}
		if c.mgr.AltTLSALPNPort > 0 {
			useTLSALPNPort = c.mgr.AltTLSALPNPort
		}

		c.acmeClient.Challenge.SetTLSALPN01Provider(distributedSolver{
			acmeManager: c.mgr,
			providerServer: &tlsALPNSolver{
				config:  c.mgr.config,
				address: net.JoinHostPort(c.mgr.ListenHost, strconv.Itoa(useTLSALPNPort)),
			},
			caURL: c.caURL,
		})

	case challenge.DNS01:
		if c.mgr.DNSChallengeOption != nil {
			c.acmeClient.Challenge.SetDNS01Provider(c.mgr.DNSProvider, c.mgr.DNSChallengeOption)
		} else {
			c.acmeClient.Challenge.SetDNS01Provider(c.mgr.DNSProvider)
		}
	}

	return randomChallenge, available
}

func (c *acmeClient) throttle(ctx context.Context, names []string) error {
	// throttling is scoped to CA + account email
	rateLimiterKey := c.caURL + "," + c.mgr.Email
	rateLimitersMu.Lock()
	rl, ok := rateLimiters[rateLimiterKey]
	if !ok {
		rl = NewRateLimiter(RateLimitEvents, RateLimitEventsWindow)
		rateLimiters[rateLimiterKey] = rl
		// TODO: stop rate limiter when it is garbage-collected...
	}
	rateLimitersMu.Unlock()
	log.Printf("[INFO]%v Waiting on rate limiter...", names)
	err := rl.Wait(ctx)
	if err != nil {
		return err
	}
	log.Printf("[INFO]%v Done waiting", names)
	return nil
}

func (c *acmeClient) usingTestCA() bool {
	return c.mgr.TestCA != "" && c.caURL == c.mgr.TestCA
}

func (c *acmeClient) revoke(_ context.Context, certRes certificate.Resource) error {
	return c.acmeClient.Certificate.Revoke(certRes.Certificate)
}

func buildUAString() string {
	ua := "CertMagic"
	if UserAgent != "" {
		ua += " " + UserAgent
	}
	return ua
}

// These internal rate limits are designed to prevent accidentally
// firehosing a CA's ACME endpoints. They are not intended to
// replace or replicate the CA's actual rate limits.
//
// Let's Encrypt's rate limits can be found here:
// https://letsencrypt.org/docs/rate-limits/
//
// Currently (as of December 2019), Let's Encrypt's most relevant
// rate limit for large deployments is 300 new orders per account
// per 3 hours (on average, or best case, that's about 1 every 36
// seconds, or 2 every 72 seconds, etc.); but it's not reasonable
// to try to assume that our internal state is the same as the CA's
// (due to process restarts, config changes, failed validations,
// etc.) and ultimately, only the CA's actual rate limiter is the
// authority. Thus, our own rate limiters do not attempt to enforce
// external rate limits. Doing so causes problems when the domains
// are not in our control (i.e. serving customer sites) and/or lots
// of domains fail validation: they clog our internal rate limiter
// and nearly starve out (or at least slow down) the other domains
// that need certificates. Failed transactions are already retried
// with exponential backoff, so adding in rate limiting can slow
// things down even more.
//
// Instead, the point of our internal rate limiter is to avoid
// hammering the CA's endpoint when there are thousands or even
// millions of certificates under management. Our goal is to
// allow small bursts in a relatively short timeframe so as to
// not block any one domain for too long, without unleashing
// thousands of requests to the CA at once.
var (
	rateLimiters   = make(map[string]*RingBufferRateLimiter)
	rateLimitersMu sync.RWMutex

	// RateLimitEvents is how many new events can be allowed
	// in RateLimitEventsWindow.
	RateLimitEvents = 10

	// RateLimitEventsWindow is the size of the sliding
	// window that throttles events.
	RateLimitEventsWindow = 1 * time.Minute
)

// Some default values passed down to the underlying lego client.
var (
	UserAgent   string
	HTTPTimeout = 30 * time.Second
)

// We keep a global cache of ACME clients so that they
// can be reused. Since the number of CAs, accounts,
// and key types should be fairly limited under best
// practices, this map will hardly ever have more than
// a few entries at the most. The associated lock
// protects access to the map but also ensures that only
// one ACME client is created at a time.
// TODO: consider using storage for a distributed lock
// TODO: consider evicting clients after some time
var (
	acmeClients   = make(map[string]*lego.Client)
	acmeClientsMu sync.Mutex
)